human/dist/tfjs.esm.js

8527 lines
1.2 MiB
JavaScript
Raw Normal View History

2022-11-18 17:13:29 +01:00
/*
Human
homepage: <https://github.com/vladmandic/human>
author: <https://github.com/vladmandic>'
*/
var SG=Object.create;var YC=Object.defineProperty;var IG=Object.getOwnPropertyDescriptor;var vG=Object.getOwnPropertyNames;var kG=Object.getPrototypeOf,NG=Object.prototype.hasOwnProperty;var Kt=(r,t)=>()=>(t||r((t={exports:{}}).exports,t),t.exports),qe=(r,t)=>{for(var e in t)YC(r,e,{get:t[e],enumerable:!0})},TG=(r,t,e,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of vG(t))!NG.call(r,n)&&n!==e&&YC(r,n,{get:()=>t[n],enumerable:!(o=IG(t,n))||o.enumerable});return r};var Lp=(r,t,e)=>(e=r!=null?SG(kG(r)):{},TG(t||!r||!r.__esModule?YC(e,"default",{value:r,enumerable:!0}):e,r));var W0=Kt((tle,V0)=>{V0.exports=kt;var ko=null;try{ko=new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([0,97,115,109,1,0,0,0,1,13,2,96,0,1,127,96,4,127,127,127,127,1,127,3,7,6,0,1,1,1,1,1,6,6,1,127,1,65,0,11,7,50,6,3,109,117,108,0,1,5,100,105,118,95,115,0,2,5,100,105,118,95,117,0,3,5,114,101,109,95,115,0,4,5,114,101,109,95,117,0,5,8,103,101,116,95,104,105,103,104,0,0,10,191,1,6,4,0,35,0,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,126,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,127,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,128,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,129,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,130,34,4,66,32,135,167,36,0,32,4,167,11])),{}).exports}catch(r){}function kt(r,t,e){this.low=r|0,this.high=t|0,this.unsigned=!!e}kt.prototype.__isLong__;Object.defineProperty(kt.prototype,"__isLong__",{value:!0});function Vr(r){return(r&&r.__isLong__)===!0}kt.isLong=Vr;var D0={},A0={};function Du(r,t){var e,o,n;return t?(r>>>=0,(n=0<=r&&r<256)&&(o=A0[r],o)?o:(e=Nt(r,(r|0)<0?-1:0,!0),n&&(A0[r]=e),e)):(r|=0,(n=-128<=r&&r<128)&&(o=D0[r],o)?o:(e=Nt(r,r<0?-1:0,!1),n&&(D0[r]=e),e))}kt.fromInt=Du;function No(r,t){if(isNaN(r))return t?Ru:To;if(t){if(r<0)return Ru;if(r>=M0)return z0}else{if(r<=-P0)return zr;if(r+1>=P0)return B0}return r<0?No(-r,t).neg():Nt(r%Xp|0,r/Xp|0,t)}kt.fromNumber=No;function Nt(r,t,e){return new kt(r,t,e)}kt.fromBits=Nt;var Ym=Math.pow;function pw(r,t,e){if(r.length===0)throw Error("empty string");if(r==="NaN"||r==="Infinity"||r==="+Infinity"||r==="-Infinity")return To;if(typeof t=="number"?(e=t,t=!1):t=!!t,e=e||10,e<2||36<e)throw RangeError("radix");var o;if((o=r.indexOf("-"))>0)throw Error("interior hyphen");if(o===0)return pw(r.substring(1),t,e).neg();for(var n=No(Ym(e,8)),s=To,a=0;a<r.length;a+=8){var i=Math.min(8,r.length-a),p=parseInt(r.substring(a,a+i),e);if(i<8){var u=No(Ym(e,i));s=s.mul(u).add(No(p))}else s=s.mul(n),s=s.add(No(p))}return s.unsigned=t,s}kt.fromString=pw;function As(r,t){return typeof r=="number"?No(r,t):typeof r=="string"?pw(r,t):Nt(r.low,r.high,typeof t=="boolean"?t:r.unsigned)}kt.fromValue=As;var F0=65536,YG=1<<24,Xp=F0*F0,M0=Xp*Xp,P0=M0/2,O0=Du(YG),To=Du(0);kt.ZERO=To;var Ru=Du(0,!0);kt.UZERO=Ru;var jp=Du(1);kt.ONE=jp;var L0=Du(1,!0);kt.UONE=L0;var uw=Du(-1);kt.NEG_ONE=uw;var B0=Nt(-1,2147483647,!1);kt.MAX_VALUE=B0;var z0=Nt(-1,-1,!0);kt.MAX_UNSIGNED_VALUE=z0;var zr=Nt(0,-2147483648,!1);kt.MIN_VALUE=zr;var de=kt.prototype;de.toInt=function(){return this.unsigned?this.low>>>0:this.low};de.toNumber=function(){return this.unsigned?(this.high>>>0)*Xp+(this.low>>>0):this.high*Xp+(this.low>>>0)};de.toString=function(t){if(t=t||10,t<2||36<t)throw RangeError("radix");if(this.isZero())return"0";if(this.isNegative())if(this.eq(zr)){var e=No(t),o=this.div(e),n=o.mul(e).sub(this);return o.toString(t)+n.toInt().toString(t)}else return"-"+this.neg().toString(t);for(var s=No(Ym(t,6),this.unsigned),a=this,i="";;){var p=a.div(s),u=a.sub(p.mul(s)).toInt()>>>0,c=u.toString(t);if(a=p,a.isZero())return c+i;for(;c.length<6;)c="0"+c;i=""+c+i}};de.getHighBits=function(){return this.high};de.getHighBitsUnsigned=function(){return this.high>>>0};de.getLowBits=function(){return this.low};de.getLowBi
`),z=F=>M.writeSync(2,F+`
`));var U=u.print||B,j=u.printErr||z;Object.assign(u,d),d=null,u.arguments&&(f=u.arguments),u.thisProgram&&(h=u.thisProgram),u.quit&&(g=u.quit);var q=4,Y=Atomics.load,J=Atomics.store,re=Atomics.compareExchange,ne;u.wasmBinary&&(ne=u.wasmBinary);var ee=u.noExitRuntime||!0;typeof WebAssembly!="object"&&Su("no native wasm support detected");var oe,ie,le=!1,be;function _e(F,V){F||Su(V)}var ve=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):void 0;function Fe(F,V,ue){V>>>=0;for(var Ee=V+ue,Be=V;F[Be]&&!(Be>=Ee);)++Be;if(Be-V>16&&F.buffer&&ve)return ve.decode(F.buffer instanceof SharedArrayBuffer?F.slice(V,Be):F.subarray(V,Be));for(var Le="";V<Be;){var ge=F[V++];if(!(ge&128)){Le+=String.fromCharCode(ge);continue}var Ne=F[V++]&63;if((ge&224)==192){Le+=String.fromCharCode((ge&31)<<6|Ne);continue}var Ft=F[V++]&63;if((ge&240)==224?ge=(ge&15)<<12|Ne<<6|Ft:ge=(ge&7)<<18|Ne<<12|Ft<<6|F[V++]&63,ge<65536)Le+=String.fromCharCode(ge);else{var no=ge-65536;Le+=String.fromCharCode(55296|no>>10,56320|no&1023)}}return Le}function Pe(F,V){return F>>>=0,F?Fe(o(),F,V):""}function st(F,V,ue,Ee){if(ue>>>=0,!(Ee>0))return 0;for(var Be=ue,Le=ue+Ee-1,ge=0;ge<F.length;++ge){var Ne=F.charCodeAt(ge);if(Ne>=55296&&Ne<=57343){var Ft=F.charCodeAt(++ge);Ne=65536+((Ne&1023)<<10)|Ft&1023}if(Ne<=127){if(ue>=Le)break;V[ue++>>>0]=Ne}else if(Ne<=2047){if(ue+1>=Le)break;V[ue++>>>0]=192|Ne>>6,V[ue++>>>0]=128|Ne&63}else if(Ne<=65535){if(ue+2>=Le)break;V[ue++>>>0]=224|Ne>>12,V[ue++>>>0]=128|Ne>>6&63,V[ue++>>>0]=128|Ne&63}else{if(ue+3>=Le)break;V[ue++>>>0]=240|Ne>>18,V[ue++>>>0]=128|Ne>>12&63,V[ue++>>>0]=128|Ne>>6&63,V[ue++>>>0]=128|Ne&63}}return V[ue>>>0]=0,ue-Be}function ct(F,V,ue){return st(F,o(),V,ue)}var Ge,lt,it,ht,gt,Mr,Mt,eo,rr;S&&(Ge=u.buffer);function Tt(F){Ge=F,u.HEAP8=lt=new Int8Array(F),u.HEAP16=ht=new Int16Array(F),u.HEAP32=Mr=new Int32Array(F),u.HEAPU8=it=new Uint8Array(F),u.HEAPU16=gt=new Uint16Array(F),u.HEAPU32=Mt=new Uint32Array(F),u.HEAPF32=eo=new Float32Array(F),u.HEAPF64=rr=new Float64Array(F)}var or=u.INITIAL_MEMORY||16777216;if(S)oe=u.wasmMemory,Ge=u.buffer;else if(u.wasmMemory)oe=u.wasmMemory;else if(oe=new WebAssembly.Memory({initial:or/65536,maximum:65536,shared:!0}),!(oe.buffer instanceof SharedArrayBuffer))throw j("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),C&&j("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)"),Error("bad memory");oe&&(Ge=oe.buffer),or=Ge.byteLength,Tt(Ge);var nr,to=[],ro=[],fr=[],Va=!1;function Lo(){return ee}function Ks(){if(u.preRun)for(typeof u.preRun=="function"&&(u.preRun=[u.preRun]);u.preRun.length;)tl(u.preRun.shift());nl(to)}function Xt(){Va=!0,!S&&nl(ro)}function Wa(){if(!S){if(u.postRun)for(typeof u.postRun=="function"&&(u.postRun=[u.postRun]);u.postRun.length;)m0(u.postRun.shift());nl(fr)}}function tl(F){to.unshift(F)}function rl(F){ro.unshift(F)}function m0(F){fr.unshift(F)}var vi=0,Rp=null,Ua=null;function by(F){vi++,u.monitorRunDependencies&&u.monitorRunDependencies(vi)}function bm(F){if(vi--,u.monitorRunDependencies&&u.monitorRunDependencies(vi),vi==0&&(Rp!==null&&(clearInterval(Rp),Rp=null),Ua)){var V=Ua;Ua=null,V()}}function Su(F){u.onAbort&&u.onAbort(F),F="Aborted("+F+")",j(F),le=!0,be=1,F+=". Build with -sASSERTIONS for more info.";var V=new WebAssembly.RuntimeError(F);throw l(V),V}var Cy="data:application/octet-stream;base64,";function Cm(F){return F.startsWith(Cy)}function Dp(F){return F.startsWith("file://")}var hr;hr="tfjs-backend-wasm-threaded-simd.wasm",Cm(hr)||(hr=_(hr));function wm(F){try{if(F==hr&&ne)return new Uint8Array(ne);if(D)return D(F);throw"both async and sync fetching of the wasm failed"}catch(V){Su(V)}}function wy(){if(!ne&&(x||b)){if(typeof fetch=="function"&&!Dp(hr))return fetch(hr,{credentials:"same-origin"}).then(function(F){if(!F.ok)throw"failed to load wasm binary file at '"+hr+"'";return F.arrayBuffer()}).catch(function(){return wm(hr)
");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"})};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}WasmBackendModuleThreadedSimd(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};`});var LB=Kt((Wg,Hv)=>{var Gv=(()=>{var r=typeof document!="undefined"&&document.currentScript?document.currentScript.src:void 0;return typeof __filename!="undefined"&&(r=r||__filename),function(t){t=t||{};var e=typeof t!="undefined"?t:{},o,n;e.ready=new Promise(function(K,ae){o=K,n=ae});var s;typeof process!="undefined"&&process.listeners&&(s={uncaughtException:process.listeners("uncaughtException"),unhandledRejection:process.listeners("unhandledRejection")});var a=Object.assign({},e),i=[],p="./this.program",u=(K,ae)=>{throw ae},c=typeof window=="object",l=typeof importScripts=="function",m=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string",d="";function f(K){return e.locateFile?e.locateFile(K,d):d+K}var h,g,x,b;function C(K){if(K instanceof Rp)return;E("exiting due to exception: "+K)}if(m){var S=zv(),k=Vv();l?d=k.dirname(d)+"/":d=__dirname+"/",h=(K,ae)=>(K=Ks(K)?new URL(K):k.normalize(K),S.readFileSync(K,ae?void 0:"utf8")),x=K=>{var ae=h(K,!0);return ae.buffer||(ae=new Uint8Array(ae)),ae},g=(K,ae,$e)=>{K=Ks(K)?new URL(K):k.normalize(K),S.readFile(K,function(at,_t){at?$e(at):ae(_t.buffer)})},process.argv.length>1&&(p=process.argv[1].replace(/\\/g,"/")),i=process.argv.slice(2),process.on("uncaughtException",function(K){if(!(K instanceof Rp))throw K}),process.on("unhandledRejection",function(K){throw K}),u=(K,ae)=>{if(it())throw process.exitCode=K,ae;C(ae),process.exit(K)},e.inspect=function(){return"[Emscripten Module object]"}}else(c||l)&&(l?d=self.location.href:typeof document!="undefined"&&document.currentScript&&(d=document.currentScript.src),r&&(d=r),d.indexOf("blob:")!==0?d=d.substr(0,d.replace(/[?#].*/,"").lastIndexOf("/")+1):d="",h=K=>{var ae=new XMLHttpRequest;return ae.open("GET",K,!1),ae.send(null),ae.responseText},l&&(x=K=>{var ae=new XMLHttpRequest;return ae.open("GET",K,!1),ae.responseTy
2022-11-18 17:13:29 +01:00
`)),p.join(`
`)}function a4(r,t,e,o){let n=He(t),s=o[o.length-1],a=new Array(s).fill(0),i=t.length,p=e==="complex64"?xl(r):r;if(i>1)for(let u=0;u<n/s;u++){let c=u*s;for(let l=0;l<s;l++)a[l]=Math.max(a[l],gl(p[c+l],0,e).length)}return a}function gl(r,t,e){let o;return Array.isArray(r)?o=`${parseFloat(r[0].toFixed(dw))} + ${parseFloat(r[1].toFixed(dw))}j`:zo(r)?o=`'${r}'`:e==="bool"?o=Y0(r):o=parseFloat(r.toFixed(dw)).toString(),Nu(o,t)}function Y0(r){return r===0?"false":"true"}function Jm(r,t,e,o,n,s=!0){let a=e==="complex64"?2:1,i=t[0],p=t.length;if(p===0){if(e==="complex64"){let h=xl(r);return[gl(h[0],0,e)]}return e==="bool"?[Y0(r[0])]:[r[0].toString()]}if(p===1){if(i>j0){let g=hl*a,x=Array.from(r.slice(0,g)),b=Array.from(r.slice((i-hl)*a,i*a));return e==="complex64"&&(x=xl(x),b=xl(b)),["["+x.map((C,S)=>gl(C,n[S],e)).join(", ")+", ..., "+b.map((C,S)=>gl(C,n[i-hl+S],e)).join(", ")+"]"]}return["["+(e==="complex64"?xl(r):Array.from(r)).map((g,x)=>gl(g,n[x],e)).join(", ")+"]"]}let u=t.slice(1),c=o.slice(1),l=o[0]*a,m=[];if(i>j0){for(let h=0;h<hl;h++){let g=h*l,x=g+l;m.push(...Jm(r.slice(g,x),u,e,c,n,!1))}m.push("...");for(let h=i-hl;h<i;h++){let g=h*l,x=g+l;m.push(...Jm(r.slice(g,x),u,e,c,n,h===i-1))}}else for(let h=0;h<i;h++){let g=h*l,x=g+l;m.push(...Jm(r.slice(g,x),u,e,c,n,h===i-1))}let d=p===2?",":"";m[0]="["+(i>0?m[0]+d:"");for(let h=1;h<m.length-1;h++)m[h]=" "+m[h]+d;let f=`,
2022-11-20 22:20:02 +01:00
`;for(let h=2;h<p;h++)f+=`
`;return m[m.length-1]=" "+m[m.length-1]+"]"+(s?"":f),m}function xl(r){let t=[];for(let e=0;e<r.length;e+=2)t.push([r[e],r[e+1]]);return t}var tt=class{constructor(t,e,o){if(this.dtype=e,this.shape=t.slice(),this.size=He(t),o!=null){let n=o.length;$(n===this.size,()=>`Length of values '${n}' does not match the size inferred by the shape '${this.size}'.`)}if(e==="complex64")throw new Error("complex64 dtype TensorBuffers are not supported. Please create a TensorBuffer for the real and imaginary parts separately and call tf.complex(real, imag).");this.values=o||qm(e,this.size),this.strides=js(t)}set(t,...e){e.length===0&&(e=[0]),$(e.length===this.rank,()=>`The number of provided coordinates (${e.length}) must match the rank (${this.rank})`);let o=this.locToIndex(e);this.values[o]=t}get(...t){t.length===0&&(t=[0]);let e=0;for(let n of t){if(n<0||n>=this.shape[e]){let s=`Requested out of range element at ${t}. Buffer shape=${this.shape}`;throw new Error(s)}e++}let o=t[t.length-1];for(let n=0;n<t.length-1;++n)o+=this.strides[n]*t[n];return this.values[o]}locToIndex(t){if(this.rank===0)return 0;if(this.rank===1)return t[0];let e=t[t.length-1];for(let o=0;o<t.length-1;++o)e+=this.strides[o]*t[o];return e}indexToLoc(t){if(this.rank===0)return[];if(this.rank===1)return[t];let e=new Array(this.shape.length);for(let o=0;o<e.length-1;++o)e[o]=Math.floor(t/this.strides[o]),t-=e[o]*this.strides[o];return e[e.length-1]=t,e}get rank(){return this.shape.length}toTensor(){return Ps().makeTensor(this.values,this.shape,this.dtype)}},Ps=null,Zp=null,i4=null;function Q0(r){Ps=r}function Z0(r){Zp=r}function J0(r){i4=r}var mt=class{constructor(t,e,o,n){this.kept=!1,this.isDisposedInternal=!1,this.shape=t.slice(),this.dtype=e||"float32",this.size=He(t),this.strides=js(t),this.dataId=o,this.id=n,this.rankType=this.rank<5?this.rank.toString():"higher"}get rank(){return this.shape.length}async buffer(){let t=await this.data();return Zp.buffer(this.shape,this.dtype,t)}bufferSync(){return Zp.buffer(this.shape,this.dtype,this.dataSync())}async array(){let t=await this.data();return ku(this.shape,t,this.dtype==="complex64")}arraySync(){return ku(this.shape,this.dataSync(),this.dtype==="complex64")}async data(){this.throwIfDisposed();let t=Ps().read(this.dataId);if(this.dtype==="string"){let e=await t;try{return e.map(o=>Qp(o))}catch(o){throw new Error("Failed to decode the string bytes into utf-8. To get the original bytes, call tensor.bytes().")}}return t}dataToGPU(t){return this.throwIfDisposed(),Ps().readToGPU(this.dataId,t)}dataSync(){this.throwIfDisposed();let t=Ps().readSync(this.dataId);if(this.dtype==="string")try{return t.map(e=>Qp(e))}catch(e){throw new Error("Failed to decode the string bytes into utf-8. To get the original bytes, call tensor.bytes().")}return t}async bytes(){this.throwIfDisposed();let t=await Ps().read(this.dataId);return this.dtype==="string"?t:new Uint8Array(t.buffer)}dispose(){this.isDisposed||(this.kerasMask&&this.kerasMask.dispose(),Ps().disposeTensor(this),this.isDisposedInternal=!0)}get isDisposed(){return this.isDisposedInternal}throwIfDisposed(){if(this.isDisposed)throw new Error("Tensor is disposed.")}print(t=!1){return Zp.print(this,t)}clone(){return this.throwIfDisposed(),Zp.clone(this)}toString(t=!1){let e=this.dataSync();return X0(e,this.shape,this.dtype,t)}cast(t){return this.throwIfDisposed(),Zp.cast(this,t)}variable(t=!0,e,o){return this.throwIfDisposed(),Ps().makeVariable(this,t,e,o)}};Object.defineProperty(mt,Symbol.hasInstance,{value:r=>!!r&&r.data!=null&&r.dataSync!=null&&r.throwIfDisposed!=null});function fw(){return ml("Tensor",()=>mt)}fw();var ri=class extends mt{constructor(t,e,o,n){super(t.shape,t.dtype,t.dataId,n),this.trainable=e,this.name=o}assign(t){if(t.dtype!==this.dtype)throw new Error(`dtype of the new value (${t.dtype}) and previous value (${this.dtype}) must match`);if(!br(t.shape,this.shape))throw new Error(`shape of the new value (${t.shape}) and previous value (${this.shape}) must match`);Ps().disposeTensor(this),this.dataId=t.dataId,Ps().incRef(this,null)}dispose(){Ps().disposeVaria
with dtype ${s.dtype}. `)}),e.length===1)return Wr(e[0]);let o=e,n={axis:t};return T.runKernel(ta,o,n)}var yt=N({concat_:uH});function pH(r,t,e=!1,o=!1){let n=v(r,"a","matMul"),s=v(t,"b","matMul");[n,s]=Oe(n,s);let a={a:n,b:s},i={transposeA:e,transposeB:o};return T.runKernel(Zo,a,i)}var Ze=N({matMul_:pH});function cH(r){let e={x:v(r,"x","sigmoid","float32")};return T.runKernel(bs,e)}var $a=N({sigmoid_:cH});function lH(r,t,e){let o=v(r,"x","slice","string_or_numeric");if(o.rank===0)throw new Error("Slicing scalar is not possible");let n={x:o},s={begin:t,size:e};return T.runKernel(ha,n,s)}var Xe=N({slice_:lH});function mH(r){let e={x:v(r,"x","tanh","float32")};return T.runKernel($s,e)}var Il=N({tanh_:mH});function dH(r,t,e,o,n,s){let a=v(r,"forgetBias","basicLSTMCell"),i=v(t,"lstmKernel","basicLSTMCell"),p=v(e,"lstmBias","basicLSTMCell"),u=v(o,"data","basicLSTMCell"),c=v(n,"c","basicLSTMCell"),l=v(s,"h","basicLSTMCell"),m=yt([u,l],1),d=Ze(m,i),f=Ce(d,p),h=f.shape[0],g=f.shape[1]/4,x=[h,g],b=Xe(f,[0,0],x),C=Xe(f,[0,g],x),S=Xe(f,[0,g*2],x),k=Xe(f,[0,g*3],x),_=Ce(se($a(b),Il(C)),se(c,$a(Ce(a,S)))),E=se(Il(_),$a(k));return[_,E]}var Vk=N({basicLSTMCell_:dH});function fH(r,t,e){let o=v(r,"x","batchToSpaceND"),n=t.reduce((i,p)=>i*p);$(o.rank>=1+t.length,()=>`input rank is ${o.rank} but should be > than blockShape.length ${t.length}`),$(e.length===t.length,()=>`crops.length is ${e.length} but should be equal to blockShape.length ${t.length}`),$(o.shape[0]%n===0,()=>`input tensor batch is ${o.shape[0]} but is not divisible by the product of the elements of blockShape ${t.join(" * ")} === ${n}`);let s={x:o},a={blockShape:t,crops:e};return T.runKernel(Js,s,a)}var dd=N({batchToSpaceND_:fH});function Wk(r){let t;return r.rank===0||r.rank===1?t=W(r,[1,1,1,r.size]):r.rank===2?t=W(r,[1,1,r.shape[0],r.shape[1]]):r.rank===3?t=W(r,[1,r.shape[0],r.shape[1],r.shape[2]]):t=r,t}function hH(r,t,e,o,n,s){s==null&&(s=.001);let a=v(r,"x","batchNorm"),i=v(t,"mean","batchNorm"),p=v(e,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;o!=null&&(c=v(o,"offset","batchNorm")),$(i.rank===p.rank,()=>"Batch normalization gradient requires mean and variance to have equal ranks."),$(c==null||i.rank===c.rank,()=>"Batch normalization gradient requires mean and offset to have equal ranks."),$(u==null||i.rank===u.rank,()=>"Batch normalization gradient requires mean and scale to have equal ranks.");let m={x:Wk(a),scale:u,offset:c,mean:i,variance:p},d={varianceEpsilon:s},f=T.runKernel(In,m,d);return W(f,a.shape)}var ru=N({batchNorm_:hH});function gH(r,t,e,o,n,s){let a=v(r,"x","batchNorm"),i=v(t,"mean","batchNorm"),p=v(e,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;return o!=null&&(c=v(o,"offset","batchNorm")),$(a.rank===2,()=>`Error in batchNorm2D: x must be rank 2 but got rank ${a.rank}.`),$(i.rank===2||i.rank===1,()=>`Error in batchNorm2D: mean must be rank 2 or rank 1 but got rank ${i.rank}.`),$(p.rank===2||p.rank===1,()=>`Error in batchNorm2D: variance must be rank 2 or rank 1 but got rank ${p.rank}.`),u!=null&&$(u.rank===2||u.rank===1,()=>`Error in batchNorm2D: scale must be rank 2 or rank 1 but got rank ${u.rank}.`),c!=null&&$(c.rank===2||c.rank===1,()=>`Error in batchNorm2D: offset must be rank 2 or rank 1 but got rank ${c.rank}.`),ru(a,i,p,c,u,s)}var Uk=N({batchNorm2d_:gH});function xH(r,t,e,o,n,s){let a=v(r,"x","batchNorm"),i=v(t,"mean","batchNorm"),p=v(e,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;return o!=null&&(c=v(o,"offset","batchNorm")),$(a.rank===3,()=>`Error in batchNorm3D: x must be rank 3 but got rank ${a.rank}.`),$(i.rank===3||i.rank===1,()=>`Error in batchNorm3D: mean must be rank 3 or rank 1 but got rank ${i.rank}.`),$(p.rank===3||p.rank===1,()=>`Error in batchNorm3D: variance must be rank 3 or rank 1 but got rank ${p.rank}.`),u!=null&&$(u.rank===3||u.rank===1,()=>`Error in batchNorm3D: scale must be rank 3 or rank 1 but got rank ${u.rank}.`),c!=null&&$(c.rank===3||c.rank===1,()=>`Error in batchNorm3D: offset must be rank 3 or rank 1 but got rank ${c.rank}.`),ru(
2023-08-05 15:03:11 +02:00
${n} and ${t} for depthToSpace with input shape
${o.shape}`),$(s*t>=0,()=>`Negative dimension size caused by overflow when multiplying
${s} and ${t} for depthToSpace with input shape
${o.shape}`),$(a%(t*t)===0,()=>`Dimension size must be evenly divisible by ${t*t} but is ${a} for depthToSpace with input shape ${o.shape}`);let i={x:o},p={blockSize:t,dataFormat:e};return T.runKernel(ln,i,p)}var c2=N({depthToSpace_:VH});function WH(r,t,e,o,n="NHWC",s=[1,1],a){let i=v(r,"x","depthwiseConv2d","float32"),p=v(t,"filter","depthwiseConv2d","float32"),u=i,c=!1;i.rank===3&&(c=!0,u=W(i,[1,i.shape[0],i.shape[1],i.shape[2]])),$(u.rank===4,()=>`Error in depthwiseConv2d: input must be rank 4, but got rank ${u.rank}.`),$(p.rank===4,()=>`Error in depthwiseConv2d: filter must be rank 4, but got rank ${p.rank}.`);let l=n==="NHWC"?u.shape[3]:u.shape[1];$(l===p.shape[2],()=>`Error in depthwiseConv2d: number of input channels (${l}) must match the inChannels dimension in filter ${p.shape[2]}.`),Lt("depthwiseConv2d",o,a);let m={x:u,filter:p},d={strides:e,pad:o,dataFormat:n,dilations:s,dimRoundingMode:a},f=T.runKernel(mn,m,d);return c?W(f,[f.shape[1],f.shape[2],f.shape[3]]):f}var oc=N({depthwiseConv2d_:WH});function UH(r){let e={x:v(r,"x","diag")};return T.runKernel(oa,e)}var l2=N({diag_:UH});function GH(r,t,e,o,n=[1,1],s="NHWC"){let a=v(r,"x","dilation2d"),i=v(t,"filter","dilation2d");$(a.rank===3||a.rank===4,()=>`Error in dilation2d: input must be rank 3 or 4, but got rank ${a.rank}.`),$(i.rank===3,()=>`Error in dilation2d: filter must be rank 3, but got rank ${i.rank}.`),$(s==="NHWC",()=>`Error in dilation2d: Only NHWC is currently supported, but got dataFormat of ${s}`);let p=a,u=!1;a.rank===3&&(p=W(a,[1,a.shape[0],a.shape[1],a.shape[2]]),u=!0),$(p.shape[3]===i.shape[2],()=>`Error in dilation2d: input and filter must have the same depth: ${p.shape[3]} vs ${i.shape[2]}`);let c={x:p,filter:i},l={strides:e,pad:o,dilations:n},m=T.runKernel(dn,c,l);return u?W(m,[m.shape[1],m.shape[2],m.shape[3]]):m}var m2=N({dilation2d_:GH});var Sr={};qe(Sr,{assertAndGetBroadcastShape:()=>rt,getBroadcastDims:()=>d2,getReductionAxes:()=>gd});function d2(r,t){let e=r.length,o=[];for(let n=0;n<e;n++){let s=e-1-n,a=r[s]||1;(t[t.length-1-n]||1)>1&&a===1&&o.unshift(s)}return o}function gd(r,t){let e=[];for(let o=0;o<t.length;o++){let n=r[r.length-o-1],s=t.length-o-1,a=t[s];(n==null||n===1&&a>1)&&e.unshift(s)}return e}function rt(r,t){let e=Math.max(r.length,t.length),o=new Array(e);for(let n=0;n<e;n++){let s=r[r.length-n-1];s==null&&(s=1);let a=t[t.length-n-1];if(a==null&&(a=1),s===1)o[e-n-1]=a;else if(a===1)o[e-n-1]=s;else if(s!==a){let i=`Operands could not be broadcast together with shapes ${r} and ${t}.`;throw Error(i)}else o[e-n-1]=s}return o}function HH(r,t){let e=v(r,"a","equal","string_or_numeric"),o=v(t,"b","equal","string_or_numeric");[e,o]=Oe(e,o),rt(e.shape,o.shape);let n={a:e,b:o};return T.runKernel(xn,n)}var xd=N({equal_:HH});function KH(r,t,e){let o=v(t,"a","where"),n=v(e,"b","where"),s=v(r,"condition","where","bool"),a=rt(rt(s.shape,o.shape),n.shape),i=ou(s,a),p=ou(o,a),u=ou(n,a),c={condition:i,t:p,e:u};return T.runKernel(fa,c)}var co=N({where_:KH});function qH(r){let e={x:v(r,"x","zerosLike")};return T.runKernel(Sa,e)}var Gt=N({zerosLike_:qH});function jH(r,t){let e=v(r,"a","div"),o=v(t,"b","div");[e,o]=Oe(e,o);let n=je(e,o),s=Gt(n),a=xd(o,s);return co(a,s,n)}var f2=N({divNoNan_:jH});function XH(r,t){let e=v(r,"t1","dot"),o=v(t,"t2","dot");$((e.rank===1||e.rank===2)&&(o.rank===1||o.rank===2),()=>`Error in dot: inputs must all be rank 1 or 2, but got ranks ${e.rank} and ${o.rank}.`);let n=e.rank===1?e.size:e.shape[1],s=o.rank===1?o.size:o.shape[0];if($(n===s,()=>`Error in dot: inner dimensions of inputs must match, but got ${n} and ${s}.`),e.rank===1&&o.rank===1){let a=W(e,[1,-1]),i=W(o,[-1,1]),p=Ze(a,i);return W(p,[])}else if(e.rank===1&&o.rank===2){let a=W(e,[1,-1]),i=W(o,[o.shape[0],o.shape[1]]),p=Ze(a,i);return W(p,[p.size])}else if(e.rank===2&&o.rank===1){let a=W(o,[-1,1]),i=Ze(e,a);return W(i,[i.size])}else{let a=W(o,[o.shape[0],o.shape[1]]);return Ze(e,a)}}var h2=N({dot_:XH});function YH(r,...t){let e=t.map((n,s)=>v(n,`tensors${s}`,"einsum")),o={equation:r};return T.runKernel(Li,e,o)}var su=N({einsum_:YH});function
rank ${s.rank}.`),$(Ka(t),()=>`Error in localResponseNormalization: depthRadius must be an integer but got depthRadius ${t}.`);let a=s,i=!1;s.rank===3&&(i=!0,a=W(s,[1,s.shape[0],s.shape[1],s.shape[2]]));let p={x:a},u={depthRadius:t,bias:e,alpha:o,beta:n},c=T.runKernel(Bn,p,u);return i?W(c,[c.shape[1],c.shape[2],c.shape[3]]):c}var N2=N({localResponseNormalization_:EK});function RK(r){let e={x:v(r,"x","log","float32")};return T.runKernel(Fn,e)}var ui=N({log_:RK});function DK(r){let e={x:v(r,"x","log1p")};return T.runKernel(Pn,e)}var vd=N({log1p_:DK});function AK(r){return $(qs(r),()=>"The f passed in grad(f) must be a function"),(t,e)=>{let o=v(t,"x","tf.grad","string_or_numeric"),n=e!=null?v(e,"dy","tf.grad"):null;return T.tidy(()=>{let{value:s,grads:a}=T.gradients(()=>r(o),[o],n);return n!=null&&xt(s.shape,n.shape,"The shape of dy passed in grad(f)(x, dy) must match the shape returned by f(x)"),kd(a),a[0]})}}function FK(r){return $(qs(r),()=>"The f passed in grads(f) must be a function"),(t,e)=>{$(Array.isArray(t),()=>"The args passed in grads(f)(args) must be an array of `Tensor`s or `TensorLike`s");let o=ni(t,"args","tf.grads","string_or_numeric"),n=e!=null?v(e,"dy","tf.grads"):null;return T.tidy(()=>{let{value:s,grads:a}=T.gradients(()=>r(...o),o,n);return n!=null&&xt(s.shape,n.shape,"The shape of dy passed in grads(f)([x1,...], dy) must match the shape returned by f([x1,...])"),kd(a),a})}}function PK(r){return $(qs(r),()=>"The f passed in valueAndGrad(f) must be a function"),(t,e)=>{$(t instanceof mt,()=>"The x passed in valueAndGrad(f)(x) must be a tensor"),$(e==null||e instanceof mt,()=>"The dy passed in valueAndGrad(f)(x, dy) must be a tensor");let{grads:o,value:n}=T.gradients(()=>r(t),[t],e);return kd(o),{grad:o[0],value:n}}}function OK(r){return $(qs(r),()=>"The f passed in valueAndGrads(f) must be a function"),(t,e)=>{$(Array.isArray(t)&&t.every(n=>n instanceof mt),()=>"The args passed in valueAndGrads(f)(args) must be array of tensors"),$(e==null||e instanceof mt,()=>"The dy passed in valueAndGrads(f)(args, dy) must be a tensor");let o=T.gradients(()=>r(...t),t,e);return e!=null&&xt(o.value.shape,e.shape,"The shape of dy passed in valueAndGrads(f)([x1,...], dy) must match the shape returned by f([x1,...])"),kd(o.grads),o}}function zw(r,t){$(qs(r),()=>"The f passed in variableGrads(f) must be a function"),$(t==null||Array.isArray(t)&&t.every(u=>u instanceof ri),()=>"The varList passed in variableGrads(f, varList) must be an array of variables");let e=t!=null;if(!e){t=[];for(let u in T.registeredVariables)t.push(T.registeredVariables[u])}let o=e?t.filter(u=>!u.trainable):null,n=t.length;t=t.filter(u=>u.trainable),$(t.length>0,()=>`variableGrads() expects at least one of the input variables to be trainable, but none of the ${n} variables is trainable.`);let s=!0,{value:a,grads:i}=T.gradients(r,t,null,s);$(i.some(u=>u!=null),()=>"Cannot find a connection between any variable and the result of the loss function y=f(x). Please make sure the operations that use variables are inside the function f passed to minimize()."),$(a.rank===0,()=>`The f passed in variableGrads(f) must return a scalar, but it returned a rank-${a.rank} tensor`);let p={};return t.forEach((u,c)=>{i[c]!=null&&(p[u.name]=i[c])}),o!=null&&o.forEach(u=>p[u.name]=null),{value:a,grads:p}}function Ir(r){return T.customGrad(r)}function kd(r){if(r.filter(e=>e==null).length>0)throw new Error(`Cannot compute gradient of y=f(x) with respect to x. Make sure that
the f you passed encloses all operations that lead from x to y.`)}function MK(r){let e={x:v(r,"x","neg")};return T.runKernel(pa,e)}var pr=N({neg_:MK});function LK(r){let e={x:v(r,"x","softplus")};return T.runKernel(Cs,e)}var Nd=N({softplus_:LK});function BK(r){let t=v(r,"x","logSigmoid");return Ir(o=>({value:pr(Nd(pr(o))),gradFunc:a=>se(a,$a(pr(o)))}))(t)}var T2=N({logSigmoid_:BK});function zK(r,t){let e=v(r,"a","sub"),o=v(t,"b","sub");[e,o]=Oe(e,o);let n={a:e,b:o};return T.runKernel(Ts,n)}var Te=N({sub_:zK});function VK(r,t=-1){let e=v(r,"logits","logSoftmax");if(t===-1&&(t=e.rank-1),t!==e.rank-1)throw Error(`Log Softmax along a non-last dimension is not yet supported. Logits was rank ${e.rank} and axis was ${t}`);return Ir((n,s)=>{let i=Ra(n,t,!0),p=Te(n,i),u=Te(We(p,"float32"),ui(ot(_o(p),t,!0)));return s([u]),{value:u,gradFunc:(l,m)=>{let[d]=m,f=!0,h=_o(d);return Te(l,se(ot(l,t,f),h))}}})(e)}var _2=N({logSoftmax_:VK});function WK(r,t=null,e=!1){let o=v(r,"x","logSumExp"),n=Ti(t,o.shape),s=Ra(o,n,!0),a=Te(o,s),i=_o(a),p=ot(i,n),u=ui(p),c=Ce(W(s,u.shape),u);if(e){let l=ai(c.shape,n);return W(c,l)}return c}var Td=N({logSumExp_:WK});function UK(r,t){let e=v(r,"a","logicalAnd","bool"),o=v(t,"b","logicalAnd","bool");rt(e.shape,o.shape);let n={a:e,b:o};return T.runKernel(On,n)}var Vu=N({logicalAnd_:UK});function GK(r){let e={x:v(r,"x","logicalNot","bool")};return T.runKernel(Mn,e)}var _d=N({logicalNot_:GK});function HK(r,t){let e=v(r,"a","logicalOr","bool"),o=v(t,"b","logicalOr","bool");rt(e.shape,o.shape);let n={a:e,b:o};return T.runKernel(Ln,n)}var $d=N({logicalOr_:HK});function KK(r,t){let e=v(r,"a","logicalXor","bool"),o=v(t,"b","logicalXor","bool");return rt(e.shape,o.shape),Vu($d(r,t),_d(Vu(r,t)))}var $2=N({logicalXor_:KK});var Ed=2147483648;function qK(r,t,e="left"){let o=v(r,"sortedSequence","searchSorted"),n=v(t,"values","searchSorted"),s=o.shape[o.shape.length-1],a=n.shape[n.shape.length-1],i=W(o,[-1,s]),p=W(n,[-1,a]);if(i.rank<2)throw new Error("Sorted input argument must be at least 2-dimensional");if(i.shape[0]!==p.shape[0])throw new Error("Leading dimension of 'sortedSequence' and 'values' must match.");if(He(p.shape)>=Ed)throw new Error(`values tensor size must less than ${Ed}`);if(i.shape[1]>=Ed)throw new Error(`trailing dim_size must less than ${Ed} for int32 output type, was ${i.shape[1]}`);let u={sortedSequence:i,values:p},c={side:e};return T.runKernel(fs,u,c)}var Nl=N({searchSorted_:qK});function E2(r,t){return Nl(r,t,"left")}function jK(r,t,e,o,n){let s=v(r,"x","maxPool"),a=1,i=s,p=!1;s.rank===3&&(p=!0,i=W(s,[1,s.shape[0],s.shape[1],s.shape[2]])),$(i.rank===4,()=>`Error in maxPool: input must be rank 4 but got rank ${i.rank}.`),$(gr(e,a),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${e} and dilations '${a}'`),Lt("maxPool",o,n);let u={x:i},c={filterSize:t,strides:e,pad:o,dimRoundingMode:n},l=T.runKernel(Wn,u,c);return p?W(l,[l.shape[1],l.shape[2],l.shape[3]]):l}var Rd=N({maxPool_:jK});function XK(r,t=[1,1,1],e,o,n,s="NDHWC"){let a=v(r,"x","maxPool3d"),i=a,p=!1;a.rank===4&&(p=!0,i=W(a,[1,a.shape[0],a.shape[1],a.shape[2],a.shape[3]])),$(i.rank===5,()=>`Error in maxPool3d: x must be rank 5 but got rank ${i.rank}.`),$(s==="NDHWC",()=>`Error in maxPool3d: Only NDHWC is currently supported, but got dataFormat of ${s}`),Lt("maxPool3d",o,n);let u={x:i},c={filterSize:t,strides:e,pad:o,dimRoundingMode:n,dataFormat:s},l=T.runKernel(ia,u,c);return p?W(l,[l.shape[1],l.shape[2],l.shape[3],l.shape[4]]):l}var R2=N({maxPool3d_:XK});function YK(r,t,e,o,n=!1){let a={x:v(r,"x","maxPoolWithArgmax")},i={filterSize:t,strides:e,pad:o,includeBatchInIndex:n},p=T.runKernel(ua,a,i);return{result:p[0],indexes:p[1]}}var D2=N({maxPoolWithArgmax_:YK});function QK(r,t){let e=v(r,"a","maximum"),o=v(t,"b","maximum");[e,o]=Oe(e,o),e.dtype==="bool"&&(e=We(e,"int32"),o=We(o,"int32")),rt(e.shape,o.shape);let n={a:e,b:o};return T.runKernel(Vn,n)}var Dd=N({maximum_:QK});function ZK(r,t=null,e=!1){let n={x:v(r,"x","mean")},s={axis:t,keepDims:e};return T.runKernel(Un,n,s)}var Wu=N({mean_:ZK});function Ur(r,t="flo
2022-11-18 17:13:29 +01:00
Actual: ${n}.
2023-08-05 15:03:11 +02:00
Expected: ${s}.`);for(let a=0;a<s.length;++a){let i=n[a],p=s[a];if(!e(i,p))throw new Error(`Arrays differ: actual[${a}] = ${i}, expected[${a}] = ${p}.
2022-11-18 17:13:29 +01:00
Actual: ${n}.
Expected: ${s}.`)}typeof expect!="undefined"&&expect().nothing()}function Dq(r,t){r().then(()=>t.fail(),()=>t()),typeof expect!="undefined"&&expect().nothing()}function Aq(r,t){let e=typeof t=="string"||typeof t=="number"||typeof t=="boolean"?[t]:t;return zo(r)||zo(r[0])||zo(t)||zo(t[0])?jw(r,e,(o,n)=>o==n):jw(r,t,(o,n)=>Xw(o,n,0))}function f1(r,t,e){if(e==null&&(e=Md()),!Xw(r,t,e))throw new Error(`Numbers differ: actual === ${r}, expected === ${t}`);typeof expect!="undefined"&&expect().nothing()}function Xw(r,t,e){return!isFinite(r)&&!isFinite(t)?!0:!(isNaN(r)||isNaN(t)||Math.abs(r-t)>e)}function Fq(r,t,e){for(let o=0;o<r.length;o++)if(r[o]<t||r[o]>e)throw new Error(`Value out of range:${r[o]} low: ${t}, high: ${e}`)}function Pq(r,t){let e=new Float32Array(r),o=new Float32Array(t);if(e.length!==o.length)throw new Error(`Expected ArrayBuffer to be of length ${o.length}, but it was ${e.length}`);for(let n=0;n<o.length;n++)if(e[n]!==o[n])throw new Error(`Expected ArrayBuffer value at ${n} to be ${o[n]} but got ${e[n]} instead`)}function h1(r){for(let t=0;t<r.length;t++){let e=r[t];Array.isArray(e)?h1(e):r[t]=Zi(e)}return r}function Oq(r){let t=document.createElement("video");return"playsInline"in t&&(t.playsInline=!0),t.muted=!0,t.loop=!0,t.style.position="fixed",t.style.left="0px",t.style.top="0px",t.preload="auto",t.appendChild(r),new Promise(e=>{t.addEventListener("loadeddata",o=>e(t)),t.load()})}async function Mq(r){await r.play(),"requestVideoFrameCallback"in r&&await new Promise(t=>{r.requestVideoFrameCallback(t)})}var Hu=class{constructor(t,e,o,n,s){this.mean=t,this.stdDev=e,this.dtype=o,this.nextVal=NaN,this.truncated=n,this.truncated&&(this.upper=this.mean+this.stdDev*2,this.lower=this.mean-this.stdDev*2);let a=s||Math.random();this.random=zd.alea(a.toString())}nextValue(){if(!isNaN(this.nextVal)){let n=this.nextVal;return this.nextVal=NaN,n}let t,e,o=!1;for(;!o;){let n,s,a;do n=2*this.random()-1,s=2*this.random()-1,a=n*n+s*s;while(a>=1||a===0);let i=Math.sqrt(-2*Math.log(a)/a);t=this.mean+this.stdDev*n*i,e=this.mean+this.stdDev*s*i,(!this.truncated||this.isValidTruncated(t))&&(o=!0)}return(!this.truncated||this.isValidTruncated(e))&&(this.nextVal=this.convertValue(e)),this.convertValue(t)}convertValue(t){return this.dtype==null||this.dtype==="float32"?t:Math.round(t)}isValidTruncated(t){return t<=this.upper&&t>=this.lower}},Ld=class{constructor(t,e,o,n){this.alpha=t,this.beta=1/e,this.dtype=o;let s=n||Math.random();this.randu=zd.alea(s.toString()),this.randn=new Hu(0,1,o,!1,this.randu()),t<1?this.d=t+2/3:this.d=t-1/3,this.c=1/Math.sqrt(9*this.d)}nextValue(){let t,e,o,n,s,a;for(;;){do n=this.randn.nextValue(),a=1+this.c*n;while(a<=0);if(a*=a*a,t=n*n,e=1-.331*t*t,o=.5*t+this.d*(1-a+Math.log(a)),s=this.randu(),s<e||Math.log(s)<o)break}return a=1/this.beta*this.d*a,this.alpha<1&&(a*=Math.pow(this.randu(),1/this.alpha)),this.convertValue(a)}convertValue(t){return this.dtype==="float32"?t:Math.round(t)}},Bd=class{constructor(t=0,e=1,o,n){if(this.canReturnFloat=()=>this.dtype==null||this.dtype==="float32",this.min=t,this.range=e-t,this.dtype=o,n==null&&(n=Math.random()),typeof n=="number"&&(n=n.toString()),!this.canReturnFloat()&&this.range<=1)throw new Error(`The difference between ${t} - ${e} <= 1 and dtype is not float`);this.random=zd.alea(n)}convertValue(t){return this.canReturnFloat()?t:Math.round(t)}nextValue(){return this.convertValue(this.min+this.range*this.random())}};function Lq(r,t,e=1,o="float32",n){if(Ct(r),e==null&&(e=1),o==null&&(o="float32"),o!=="float32"&&o!=="int32")throw new Error(`Unsupported data type ${o}`);let s=new Ld(t,e,o,n),a=me(r,o);for(let i=0;i<a.values.length;i++)a.values[i]=s.nextValue();return a.toTensor()}var x1=N({randomGamma_:Lq});function Bq(r,t=0,e=1,o,n){if(Ct(r),o!=null&&o==="bool")throw new Error(`Unsupported data type ${o}`);let s=new Hu(t,e,o,!1,n),a=me(r,o);for(let i=0;i<a.values.length;i++)a.values[i]=s.nextValue();return a.toTensor()}var Vd=N({randomNormal_:Bq});function zq(r,t,e){if(t!=null&&t==="bool")throw new Error(`Unsupported data type ${t}`);return Vd(r,0,1,t,e)}va
${n.shape}`);if(s.rank!==1)throw new Error(`Values should be Tensor1D but received shape ${s.shape}`);if(a.rank!==1)throw new Error(`Dense shape should be Tensor1D but received shape ${a.shape}`);if(i.rank!==0)throw new Error(`Default value should be a scalar but received shape ${i.shape}`);let p={indices:n,values:s,denseShape:a,defaultValue:i},u=T.runKernel(Hi,p);return{outputIndices:u[0],outputValues:u[1],emptyRowIndicator:u[2],reverseIndexMap:u[3]}}var PN=N({sparseFillEmptyRows_:Oj});function Mj(r,t,e){let o=v(r,"inputIndices","sparseReshape","int32"),n=v(t,"inputShape","sparseReshape","int32"),s=v(e,"newShape","sparseReshape","int32");if(o.rank!==2)throw new Error(`Input indices should be Tensor2D but received shape
${o.shape}`);if(n.rank!==1)throw new Error(`Input shape should be Tensor1D but received shape ${n.shape}`);if(s.rank!==1)throw new Error(`New shape should be Tensor1D but received shape ${s.shape}`);let a={inputIndices:o,inputShape:n,newShape:s},i=T.runKernel(ei,a);return{outputIndices:i[0],outputShape:i[1]}}var ON=N({sparseReshape_:Mj});function Lj(r,t,e){let o=v(r,"data","sparseSegmentMean"),n=v(t,"indices","sparseSegmentMean","int32"),s=v(e,"segmentIds","sparseSegmentMean","int32");if(o.rank<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.rank!==1)throw new Error(`Indices should be Tensor1D but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.rank!==1)throw new Error(`Segment ids should be Tensor1D but received shape
${s.shape}`);let a={data:o,indices:n,segmentIds:s};return T.runKernel(ya,a)}var MN=N({sparseSegmentMean_:Lj});function Bj(r,t,e){let o=v(r,"data","sparseSegmentSum"),n=v(t,"indices","sparseSegmentSum","int32"),s=v(e,"segmentIds","sparseSegmentSum","int32");if(o.rank<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.rank!==1)throw new Error(`Indices should be Tensor1D but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.rank!==1)throw new Error(`Segment ids should be Tensor1D but received shape
${s.shape}`);let a={data:o,indices:n,segmentIds:s};return T.runKernel(ba,a)}var LN=N({sparseSegmentSum_:Bj});function zj(r,t,e,o,n,s,a,i){let p=v(r,"data","stringNGrams","string");if(p.dtype!=="string")throw new Error("Data must be of datatype string");if(p.shape.length!==1)throw new Error(`Data must be a vector, saw: ${p.shape}`);let u=v(t,"dataSplits","stringNGrams");if(u.dtype!=="int32")throw new Error("Data splits must be of datatype int32");let c={separator:e,nGramWidths:o,leftPad:n,rightPad:s,padWidth:a,preserveShortSequences:i},l={data:p,dataSplits:u},m=T.runKernel(Ca,l,c);return{nGrams:m[0],nGramsSplits:m[1]}}var BN=N({stringNGrams_:zj});function Vj(r,t,e=!0){let o=v(r,"input","stringSplit","string"),n=v(t,"delimiter","stringSplit","string");if(o.rank!==1)throw new Error(`Input should be Tensor1D but received shape ${o.shape}`);if(n.rank!==0)throw new Error(`Delimiter should be a scalar but received shape ${n.shape}`);let s={skipEmpty:e},a={input:o,delimiter:n},i=T.runKernel(qi,a,s);return{indices:i[0],values:i[1],shape:i[2]}}var zN=N({stringSplit_:Vj});function Wj(r,t){let e=v(r,"input","stringToHashBucketFast","string"),o={numBuckets:t};if(t<=0)throw new Error("Number of buckets must be at least 1");let n={input:e};return T.runKernel(ji,n,o)}var VN=N({stringToHashBucketFast_:Wj});function Uj(r,t,e,o=!0){let n=v(r,"input","staticRegexReplace","string"),s={pattern:t,rewrite:e,replaceGlobal:o};return T.runKernel($u,{x:n},s)}var WN=N({staticRegexReplace_:Uj});var Gj={fft:ac,ifft:Ku,rfft:ic,irfft:Gd},Hj={hammingWindow:nN,hannWindow:Yd,frame:Qd,stft:sN},Kj={flipLeftRight:iN,grayscaleToRGB:uN,resizeNearestNeighbor:CN,resizeBilinear:bN,rgbToGrayscale:pN,rotateWithOffset:cN,cropAndResize:aN,nonMaxSuppression:lN,nonMaxSuppressionAsync:fN,nonMaxSuppressionWithScore:hN,nonMaxSuppressionWithScoreAsync:gN,nonMaxSuppressionPadded:xN,nonMaxSuppressionPaddedAsync:yN,threshold:wN,transform:SN},qj={bandPart:IN,gramSchmidt:vN,qr:NN},jj={absoluteDifference:TN,computeWeightedLoss:cr,cosineDistance:_N,hingeLoss:$N,huberLoss:EN,logLoss:RN,meanSquaredError:DN,sigmoidCrossEntropy:AN,softmaxCrossEntropy:FN},Xj={sparseFillEmptyRows:PN,sparseReshape:ON,sparseSegmentMean:MN,sparseSegmentSum:LN},Yj={stringNGrams:BN,stringSplit:zN,stringToHashBucketFast:VN,staticRegexReplace:WN};var UN={};qe(UN,{Serializable:()=>$l,SerializationMap:()=>tf,getRegisteredName:()=>Zj,registerClass:()=>tS});var Qj=new Map,eS=new Map,$l=class{getClassName(){return this.constructor.className}static fromConfig(t,e){return new t(e)}},tf=class r{constructor(){this.classNameMap={}}static getMap(){return r.instance==null&&(r.instance=new r),r.instance}static register(t){r.getMap().classNameMap[t.className]=[t,t.fromConfig]}};function tS(r,t,e){$(r.className!=null,()=>"Class being registered does not have the static className property defined."),$(typeof r.className=="string",()=>"className is required to be a string, but got type "+typeof r.className),$(r.className.length>0,()=>"Class being registered has an empty-string as its className, which is disallowed."),typeof t=="undefined"&&(t="Custom"),typeof e=="undefined"&&(e=r.className);let o=e,n=t+">"+o;return tf.register(r),Qj.set(n,r),eS.set(r,n),r}function Zj(r){return eS.has(r)?eS.get(r):r.className}var kr=class extends $l{minimize(t,e=!1,o){let{value:n,grads:s}=this.computeGradients(t,o);if(o!=null){let a=o.map(i=>({name:i.name,tensor:s[i.name]}));this.applyGradients(a)}else this.applyGradients(s);return Ot(s),e?n:(n.dispose(),null)}get iterations(){return this.iterations_==null&&(this.iterations_=0),this.iterations_}incrementIterations(){this.iterations_=this.iterations+1}computeGradients(t,e){return zw(t,e)}dispose(){this.iterations_!=null&&Ot(this.iterations_)}async saveIterations(){return this.iterations_==null&&(this.iterations_=0),{name:"iter",tensor:ke(this.iterations_,"int32")}}async getWeights(){throw new Error("getWeights() is not implemented for this optimizer yet.")}async setWeights(t){throw new Error(`setWeights() is not implemented for this optimizer class ${this.getClassName()}`)}async extrac
Manifest JSON has weights with names: ${i.join(", ")}.`)}let p=n.reduce((d,f,h)=>(f&&d.push(h),d),[]),u=[];p.forEach(d=>{t[d].paths.forEach(f=>{let h=e+(e.endsWith("/")?"":"/")+f;u.push(h)})});let c=await r(u),l={},m=0;return p.forEach(d=>{let f=t[d].paths.length,h=new ir(c.slice(m,m+f));s[d].forEach(x=>{let b=h.slice(x.groupOffset,x.groupOffset+x.sizeBytes),C=nd(b,[x.manifestEntry]);for(let S in C)l[S]=C[S]}),m+=f}),l}}var sX="application/octet-stream",aX="application/json",El=class{constructor(t,e){if(this.DEFAULT_METHOD="POST",e==null&&(e={}),this.weightPathPrefix=e.weightPathPrefix,this.onProgress=e.onProgress,this.weightUrlConverter=e.weightUrlConverter,e.fetchFunc!=null?($(typeof e.fetchFunc=="function",()=>"Must pass a function that matches the signature of `fetch` (see https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API)"),this.fetch=e.fetchFunc):this.fetch=A().platform.fetch,$(t!=null&&t.length>0,()=>"URL path for http must not be null, undefined or empty."),Array.isArray(t)&&$(t.length===2,()=>`URL paths for http must have a length of 2, (actual length is ${t.length}).`),this.path=t,e.requestInit!=null&&e.requestInit.body!=null)throw new Error("requestInit is expected to have no pre-existing body, but has one.");this.requestInit=e.requestInit||{}}async save(t){if(t.modelTopology instanceof ArrayBuffer)throw new Error("BrowserHTTPRequest.save() does not support saving model topology in binary formats yet.");let e=Object.assign({method:this.DEFAULT_METHOD},this.requestInit);e.body=new FormData;let o=[{paths:["./model.weights.bin"],weights:t.weightSpecs}],n=sd(t,o);if(e.body.append("model.json",new Blob([JSON.stringify(n)],{type:aX}),"model.json"),t.weightData!=null){let a=ir.join(t.weightData);e.body.append("model.weights.bin",new Blob([a],{type:sX}),"model.weights.bin")}let s=await this.fetch(this.path,e);if(s.ok)return{modelArtifactsInfo:va(t),responses:[s]};throw new Error(`BrowserHTTPRequest.save() failed due to HTTP response status ${s.status}.`)}async load(){let t=await this.fetch(this.path,this.requestInit);if(!t.ok)throw new Error(`Request to ${this.path} failed with status code ${t.status}. Please verify this URL points to the model JSON of the model to load.`);let e;try{e=await t.json()}catch(s){let a=`Failed to parse model JSON of response from ${this.path}.`;throw this.path.endsWith(".pb")?a+=" Your path contains a .pb file extension. Support for .pb models have been removed in TensorFlow.js 1.0 in favor of .json models. You can re-convert your Python TensorFlow model using the TensorFlow.js 1.0 conversion scripts or you can convert your.pb models with the 'pb2json'NPM script in the tensorflow/tfjs-converter repository.":a+=" Please make sure the server is serving valid JSON for this request.",new Error(a)}let o=e.modelTopology,n=e.weightsManifest;if(o==null&&n==null)throw new Error(`The JSON from HTTP path ${this.path} contains neither model topology or manifest for weights.`);return Jp(e,s=>this.loadWeights(s))}async loadWeights(t){let e=Array.isArray(this.path)?this.path[1]:this.path,[o,n]=iX(e),s=this.weightPathPrefix||o,a=ad(t),i=[],p=[];for(let c of t)for(let l of c.paths)this.weightUrlConverter!=null?p.push(this.weightUrlConverter(l)):i.push(s+l+n);this.weightUrlConverter&&i.push(...await Promise.all(p));let u=await nS(i,{requestInit:this.requestInit,fetchFunc:this.fetch,onProgress:this.onProgress});return[a,u]}};El.URL_SCHEME_REGEX=/^https?:\/\//;function iX(r){let t=r.lastIndexOf("/"),e=r.lastIndexOf("?"),o=r.substring(0,t),n=e>t?r.substring(e):"";return[o+"/",n]}function rf(r){return r.match(El.URL_SCHEME_REGEX)!=null}var jN=(r,t)=>{if(typeof fetch=="undefined"&&(t==null||t.fetchFunc==null))return null;{let e=!0;if(Array.isArray(r)?e=r.every(o=>rf(o)):e=rf(r),e)return of(r,t)}return null};qt.registerSaveRouter(jN);qt.registerLoadRouter(jN);function of(r,t){return new El(r,t)}function XN(r,t){return of(r,t)}var Rl=class{constructor(t){this.modelArtifacts=t}load(){return this.modelArtifacts}},nf=class{constructor(t){this.saveHandler=t}save(t){return this.saveHandler(t)}},aS=class{const
2023-08-05 15:03:11 +02:00
indices.shape[0] = ${r}`}function f5(r,t){return`indices(${r}, 0) is invalid: ${t} < 0`}function h5(r,t,e){return`indices(${r}, 0) is invalid: ${t} >= ${e}`}function g5(r,t){return`only one output dimension may be -1, not both ${r} and ${t}`}function x5(r,t){return`size ${r} must be non-negative, not ${t}`}function y5(){return"reshape cannot infer the missing input size for an empty tensor unless all specified input sizes are non-zero"}function b5(r,t){let e=He(r),o=He(t);return`Input to reshape is a SparseTensor with ${e}
dense values, but the requested shape requires a multiple of ${o}. inputShape=${r} outputShape= ${t}`}function C5(r,t){let e=He(r),o=He(t);return`Input to reshape is a tensor with ${e} dense values, but the requested shape has ${o}. inputShape=${r} outputShape=${t}`}function w5(){return"segment ids must be >= 0"}function S5(){return"segment ids are not increasing"}function I5(r,t){return`Segment id ${r} out of range [0, ${t}), possibly because segmentIds input is not sorted.`}function v5(r,t,e){return`Bad: indices[${r}] == ${t} out of range [0, ${e})`}var lS={};qe(lS,{collectGatherOpShapeInfo:()=>T5,computeOutShape:()=>N5,segOpComputeOptimalWindowSize:()=>k5});function k5(r,t){let e=!1,o;for(r<=af?(o=r,e=!0):o=Vp(r,Math.floor(Math.sqrt(r)));!e;)o>t||o===r?e=!0:o=Vp(r,o+1);return o}function N5(r,t,e){let o=[],n=r.length;for(let s=0;s<n;s++)s!==t?o.push(r[s]):o.push(e);return o}function T5(r,t,e,o){let n=t.shape.length,s=r.shape.length;if(o!==0&&(o<-n||o>n))throw new Error(`Expect batchDims in the range of [-${n}, ${n}], but got ${o}`);if(o<0&&(o+=n),o>s)throw new Error(`batchDims (${o}) must be less than rank(x) (
${s}).`);if(e<o)throw new Error(`batchDims (${o}) must be less than or equal to axis (${e}).`);for(let l=0;l<o;++l)if(r.shape[l]!==t.shape[l])throw new Error(`x.shape[${l}]: ${r.shape[l]} should be equal to indices.shape[${l}]: ${t.shape[l]}.`);let a=r.shape[e],i=[],p=1,u=1,c=1;for(let l=0;l<o;++l)i.push(r.shape[l]),p*=r.shape[l];for(let l=o;l<e;l++)i.push(r.shape[l]),u*=r.shape[l];for(let l=o;l<n;l++)i.push(t.shape[l]);for(let l=e+1;l<s;l++)i.push(r.shape[l]),c*=r.shape[l];return{batchSize:p,sliceSize:c,outerSize:u,dimSize:a,outputShape:i}}function _5(r){try{return r.map(t=>Qp(t))}catch(t){throw new Error(`Failed to decode encoded string bytes into utf-8, error: ${t}`)}}function $5(r){return r.map(t=>Zi(t))}var Vt={};qe(Vt,{nonMaxSuppressionV3Impl:()=>Zd,nonMaxSuppressionV4Impl:()=>Jd,nonMaxSuppressionV5Impl:()=>ef,whereImpl:()=>jd});GN();var E5=A();E5.registerFlag("KEEP_INTERMEDIATE_TENSORS",()=>!1,r=>{r&&console.warn("Keep intermediate tensors is ON. This will print the values of all intermediate tensors during model inference. Not all models support this mode. For details, check e2e/benchmarks/ model_config.js. This significantly impacts performance.")});var fo;(function(r){r[r.DT_INVALID=0]="DT_INVALID",r[r.DT_FLOAT=1]="DT_FLOAT",r[r.DT_DOUBLE=2]="DT_DOUBLE",r[r.DT_INT32=3]="DT_INT32",r[r.DT_UINT8=4]="DT_UINT8",r[r.DT_INT16=5]="DT_INT16",r[r.DT_INT8=6]="DT_INT8",r[r.DT_STRING=7]="DT_STRING",r[r.DT_COMPLEX64=8]="DT_COMPLEX64",r[r.DT_INT64=9]="DT_INT64",r[r.DT_BOOL=10]="DT_BOOL",r[r.DT_QINT8=11]="DT_QINT8",r[r.DT_QUINT8=12]="DT_QUINT8",r[r.DT_QINT32=13]="DT_QINT32",r[r.DT_BFLOAT16=14]="DT_BFLOAT16",r[r.DT_QINT16=15]="DT_QINT16",r[r.DT_QUINT16=16]="DT_QUINT16",r[r.DT_UINT16=17]="DT_UINT16",r[r.DT_COMPLEX128=18]="DT_COMPLEX128",r[r.DT_HALF=19]="DT_HALF",r[r.DT_RESOURCE=20]="DT_RESOURCE",r[r.DT_VARIANT=21]="DT_VARIANT",r[r.DT_UINT32=22]="DT_UINT32",r[r.DT_UINT64=23]="DT_UINT64",r[r.DT_FLOAT_REF=101]="DT_FLOAT_REF",r[r.DT_DOUBLE_REF=102]="DT_DOUBLE_REF",r[r.DT_INT32_REF=103]="DT_INT32_REF",r[r.DT_UINT8_REF=104]="DT_UINT8_REF",r[r.DT_INT16_REF=105]="DT_INT16_REF",r[r.DT_INT8_REF=106]="DT_INT8_REF",r[r.DT_STRING_REF=107]="DT_STRING_REF",r[r.DT_COMPLEX64_REF=108]="DT_COMPLEX64_REF",r[r.DT_INT64_REF=109]="DT_INT64_REF",r[r.DT_BOOL_REF=110]="DT_BOOL_REF",r[r.DT_QINT8_REF=111]="DT_QINT8_REF",r[r.DT_QUINT8_REF=112]="DT_QUINT8_REF",r[r.DT_QINT32_REF=113]="DT_QINT32_REF",r[r.DT_BFLOAT16_REF=114]="DT_BFLOAT16_REF",r[r.DT_QINT16_REF=115]="DT_QINT16_REF",r[r.DT_QUINT16_REF=116]="DT_QUINT16_REF",r[r.DT_UINT16_REF=117]="DT_UINT16_REF",r[r.DT_COMPLEX128_REF=118]="DT_COMPLEX128_REF",r[r.DT_HALF_REF=119]="DT_HALF_REF",r[r.DT_RESOURCE_REF=120]="DT_RESOURCE_REF",r[r.DT_VARIANT_REF=121]="DT_VARIANT_REF",r[r.DT_UINT32_REF=122]="DT_UINT32_REF",r[r.DT_UINT64_REF=123]="DT_UINT64_REF"})(fo||(fo={}));var xT;(function(r){let t;(function(e){e[e.LEGACY=0]="LEGACY",e[e.V1=1]="V1",e[e.V2=2]="V2"})(t=r.CheckpointFormatVersion||(r.CheckpointFormatVersion={}))})(xT||(xT={}));var dS={};function D5(r,t){let e={tfOpName:r,category:"custom",inputs:[],attrs:[],customExecutor:t};dS[r]=e}function uf(r){return dS[r]}function A5(r){delete dS[r]}function I(r,t,e,o,n){let s=t.inputParams[r];if(s&&s.inputIndexStart!==void 0){let i=s.inputIndexStart,p=s.inputIndexEnd===0?void 0:s.inputIndexEnd===void 0?i+1:s.inputIndexEnd,u=i<0?t.inputNames.length+i:i;if(s.type==="tensor")return Bt(t.inputNames[u],e,o,n);if(s.type==="tensors"){let m=t.inputs.slice(i,p);return t.inputNames.slice(i,p).filter((f,h)=>{var g;return((g=m[h])===null||g===void 0?void 0:g.op)!=="NoOp"}).map(f=>Bt(f,e,o,n))}let c=Bt(t.inputNames[u],e,o,n),l=c.dataSync();return s.type==="number"?l[0]:y.toNestedArray(c.shape,l)}let a=t.attrParams[r];return a&&a.value}function Bt(r,t,e,o){let[n,s]=Nr(r,e);if(o!=null){let i=o.getHashTableHandleByName(n);if(i!=null)return i}let a=e.currentContextIds.find(i=>!!t[pf(n,i)]);return a!==void 0?t[pf(n,a)][s]:void 0}function fS(r,t,e){return t[pf(r,e.currentContextId)]}function Ls(r,t){let[e,o,n]=Nr(r,t);return[pf(e,t&&t.currentContextId),o,n]}function pf(r,t){return
because the value dtype is ${e.dtype}, but TensorArray dtype is ${this.dtype}.`);if(this.size()===0&&(this.elementShape==null||this.elementShape.length===0)&&(this.elementShape=e.shape),Gr(this.elementShape,e.shape,`TensorArray ${this.name}: Could not write to TensorArray index ${t}.`),o.read)throw new Error(`TensorArray ${this.name}: Could not write to TensorArray index ${t}, because it has already been read.`);if(o.written)throw new Error(`TensorArray ${this.name}: Could not write to TensorArray index ${t}, because it has already been written.`);o.tensor=e,Er(e),o.written=!0,this.tensors[t]=o}writeMany(t,e){if(t.length!==e.length)throw new Error(`TensorArray ${this.name}: could not write multiple tensors,because the index size: ${t.length} is not the same as tensors size: ${e.length}.`);t.forEach((o,n)=>this.write(o,e[n]))}gather(t,e){if(e&&e!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but gather requested dtype ${e}`);if(t)t=t.slice(0,this.size());else{t=[];for(let n=0;n<this.size();n++)t.push(n)}if(t.length===0)return ar([],[0].concat(this.elementShape));let o=this.readMany(t);return Gr(this.elementShape,o[0].shape,"TensorArray shape mismatch: "),vr(o,0)}concat(t){if(t&&t!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but concat requested dtype ${t}`);if(this.size()===0)return ar([],[0].concat(this.elementShape));let e=[];for(let n=0;n<this.size();n++)e.push(n);let o=this.readMany(e);return Gr(this.elementShape,o[0].shape,`TensorArray shape mismatch: tensor array shape (${this.elementShape}) vs first tensor shape (${o[0].shape})`),yt(o,0)}scatter(t,e){if(e.dtype!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but tensor has dtype ${e.dtype}`);if(t.length!==e.shape[0])throw new Error(`Expected len(indices) == tensor.shape[0], but saw: ${t.length} vs. ${e.shape[0]}`);let o=Math.max(...t);if(!this.dynamicSize&&o>=this.maxSize)throw new Error(`Max index must be < array size (${o} vs. ${this.maxSize})`);this.writeMany(t,mo(e,0))}split(t,e){if(e.dtype!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but tensor has dtype ${e.dtype}`);let o=0,n=t.map(p=>(o+=p,o));if(o!==e.shape[0])throw new Error(`Expected sum of lengths to be equal to
2022-11-18 17:13:29 +01:00
tensor.shape[0], but sum of lengths is
${o}, and tensor's shape is: ${e.shape}`);if(!this.dynamicSize&&t.length!==this.maxSize)throw new Error(`TensorArray's size is not equal to the size of lengths (${this.maxSize} vs. ${t.length}), and the TensorArray is not marked as dynamically resizeable`);let s=o===0?0:e.size/o,a=[];De(()=>{e=W(e,[1,o,s]);for(let p=0;p<t.length;++p){let c=[0,p===0?0:n[p-1],0],l=[1,t[p],s];a[p]=W(Xe(e,c,l),this.elementShape)}return a});let i=[];for(let p=0;p<t.length;p++)i[p]=p;this.writeMany(i,a)}};var dc=class r{get id(){return this.idTensor.id}constructor(t,e,o,n=-1){this.tensors=t,this.elementShape=e,this.elementDtype=o,t!=null&&t.forEach(s=>{if(o!==s.dtype)throw new Error(`Invalid data types; op elements ${o}, but list elements ${s.dtype}`);Gr(e,s.shape,"TensorList shape mismatch: "),Er(s)}),this.idTensor=ke(0),this.maxNumElements=n,Er(this.idTensor)}copy(){return new r([...this.tensors],this.elementShape,this.elementDtype)}clearAndClose(t){this.tensors.forEach(e=>{(t==null||!t.has(e.id))&&e.dispose()}),this.tensors.length=0,this.idTensor.dispose()}size(){return this.tensors.length}stack(t,e,o=-1){if(e!==this.elementDtype)throw new Error(`Invalid data types; op elements ${e}, but list elements ${this.elementDtype}`);if(o!==-1&&this.tensors.length!==o)throw new Error(`Operation expected a list with ${o} elements but got a list with ${this.tensors.length} elements.`);Gr(t,this.elementShape,"TensorList shape mismatch: ");let n=mc(this.elementShape,this.tensors,t);return De(()=>{let s=this.tensors.map(a=>W(a,n));return vr(s,0)})}popBack(t,e){if(e!==this.elementDtype)throw new Error(`Invalid data types; op elements ${e}, but list elements ${this.elementDtype}`);if(this.size()===0)throw new Error("Trying to pop from an empty list.");let o=mc(this.elementShape,this.tensors,t),n=this.tensors.pop();return n.kept=!1,Gr(n.shape,t,"TensorList shape mismatch: "),W(n,o)}pushBack(t){if(t.dtype!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t.dtype}, but list elements ${this.elementDtype}`);if(Gr(t.shape,this.elementShape,"TensorList shape mismatch: "),this.maxNumElements===this.size())throw new Error("Trying to push element into a full list.");Er(t),this.tensors.push(t)}resize(t){if(t<0)throw new Error(`TensorListResize expects size to be non-negative. Got: ${t}`);if(this.maxNumElements!==-1&&t>this.maxNumElements)throw new Error(`TensorListResize input size ${t} is greater maxNumElement ${this.maxNumElements}.`);let e=new r([],this.elementShape,this.elementDtype,this.maxNumElements);e.tensors.length=t;for(let o=0;o<Math.min(this.tensors.length,t);++o)e.tensors[o]=this.tensors[o];return e}getItem(t,e,o){if(o!==this.elementDtype)throw new Error(`Invalid data types; op elements ${o}, but list elements ${this.elementDtype}`);if(t<0||t>this.tensors.length)throw new Error(`Trying to access element ${t} in a list with ${this.tensors.length} elements.`);if(this.tensors[t]==null)throw new Error(`element at index ${t} is null.`);Gr(this.tensors[t].shape,e,"TensorList shape mismatch: ");let n=mc(this.elementShape,this.tensors,e);return W(this.tensors[t],n)}setItem(t,e){if(e.dtype!==this.elementDtype)throw new Error(`Invalid data types; op elements ${e.dtype}, but list elements ${this.elementDtype}`);if(t<0||this.maxNumElements!==-1&&t>=this.maxNumElements)throw new Error(`Trying to set element ${t} in a list with max ${this.maxNumElements} elements.`);Gr(this.elementShape,e.shape,"TensorList shape mismatch: "),Er(e),this.tensors[t]!=null&&(this.tensors[t].kept=!1),this.tensors[t]=e}gather(t,e,o){if(e!==this.elementDtype)throw new Error(`Invalid data types; op elements ${e}, but list elements ${this.elementDtype}`);Gr(this.elementShape,o,"TensorList shape mismatch: "),t=t.slice(0,this.size());let n=mc(this.elementShape,this.tensors,o);return t.length===0?ar([],[0].concat(n)):De(()=>{let s=t.map(a=>W(this.tensors[a],n));return vr(s,0)})}concat(t,e){if(t&&t!==this.elementDtype)throw new Error(`TensorList dtype is ${this.elementDtype} but concat requested dtype ${t}`);Gr(this.elementShape,e,"TensorList shape mismatch: ");let o=m
2022-11-18 17:13:29 +01:00
tensor.shape[0], but sum of lengths is
${o}, and tensor's shape is: ${r.shape}`);let s=r.shape.slice(1),a=wf(s,e),i=o===0?0:r.size/o,p=De(()=>{let c=[];r=W(r,[1,o,i]);for(let l=0;l<t.length;++l){let d=[0,l===0?0:n[l-1],0],f=[1,t[l],i];c[l]=W(Xe(r,d,f),a)}return r.dispose(),c}),u=new dc([],e,r.dtype,t.length);for(let c=0;c<p.length;c++)u.setItem(c,p[c]);return u}var _T=async(r,t,e)=>{switch(r.op){case"If":case"StatelessIf":{let o=I("thenBranch",r,t,e),n=I("elseBranch",r,t,e),s=I("cond",r,t,e),a=I("args",r,t,e);return(await s.data())[0]?e.functionMap[o].executeFunctionAsync(a,e.tensorArrayMap,e.tensorListMap):e.functionMap[n].executeFunctionAsync(a,e.tensorArrayMap,e.tensorListMap)}case"While":case"StatelessWhile":{let o=I("body",r,t,e),n=I("cond",r,t,e),s=I("args",r,t,e),a=await e.functionMap[n].executeFunctionAsync(s,e.tensorArrayMap,e.tensorListMap),i=s.map(c=>c.id),p=await a[0].data();a.forEach(c=>{!c.kept&&i.indexOf(c.id)===-1&&c.dispose()});let u=s;for(;p[0];){let c=u;u=await e.functionMap[o].executeFunctionAsync(u,e.tensorArrayMap,e.tensorListMap);let l=u.map(d=>d.id);c.forEach(d=>{!d.kept&&i.indexOf(d.id)===-1&&l.indexOf(d.id)===-1&&d.dispose()});let m=await e.functionMap[n].executeFunctionAsync(u,e.tensorArrayMap,e.tensorListMap);p=await m[0].data(),m.forEach(d=>{!d.kept&&i.indexOf(d.id)===-1&&l.indexOf(d.id)===-1&&d.dispose()})}return u}case"LoopCond":{let o=I("pred",r,t,e);return[Bs(o)]}case"Switch":{let o=I("pred",r,t,e),n=I("data",r,t,e);return n.kept||(n=Bs(n)),(await o.data())[0]?[void 0,n]:[n,void 0]}case"Merge":{let o=r.inputNames.find(n=>Bt(n,t,e)!==void 0);if(o){let n=Bt(o,t,e);return[Bs(n)]}return}case"Enter":{let o=I("frameName",r,t,e),n=I("tensor",r,t,e);return e.enterFrame(o),[Bs(n)]}case"Exit":{let o=I("tensor",r,t,e);return e.exitFrame(),[Bs(o)]}case"NextIteration":{let o=I("tensor",r,t,e);return e.nextIteration(),[Bs(o)]}case"TensorArrayV3":{let o=I("size",r,t,e),n=I("dtype",r,t,e),s=I("elementShape",r,t,e),a=I("dynamicSize",r,t,e),i=I("clearAfterRead",r,t,e),p=I("identicalElementShapes",r,t,e),u=I("name",r,t,e),c=new Sf(u,n,o,s,p,a,i);return e.addTensorArray(c),[c.idTensor,ke(1)]}case"TensorArrayWriteV3":{let o=I("tensorArrayId",r,t,e),n=I("index",r,t,e),s=I("tensor",r,t,e),a=e.getTensorArray(o.id);return a.write(n,s),[a.idTensor]}case"TensorArrayReadV3":{let o=I("tensorArrayId",r,t,e),n=I("index",r,t,e);return[e.getTensorArray(o.id).read(n)]}case"TensorArrayGatherV3":{let o=I("tensorArrayId",r,t,e),n=I("indices",r,t,e),s=I("dtype",r,t,e);return[e.getTensorArray(o.id).gather(n,s)]}case"TensorArrayScatterV3":{let o=I("tensorArrayId",r,t,e),n=I("indices",r,t,e),s=I("tensor",r,t,e),a=e.getTensorArray(o.id);return a.scatter(n,s),[a.idTensor]}case"TensorArrayConcatV3":{let o=I("tensorArrayId",r,t,e),n=e.getTensorArray(o.id),s=I("dtype",r,t,e);return[n.concat(s)]}case"TensorArraySplitV3":{let o=I("tensorArrayId",r,t,e),n=I("tensor",r,t,e),s=I("lengths",r,t,e),a=e.getTensorArray(o.id);return a.split(s,n),[a.idTensor]}case"TensorArraySizeV3":{let o=I("tensorArrayId",r,t,e),n=e.getTensorArray(o.id);return[ke(n.size(),"int32")]}case"TensorArrayCloseV3":{let o=I("tensorArrayId",r,t,e),n=e.getTensorArray(o.id);return n.clearAndClose(),[n.idTensor]}case"TensorListSetItem":{let o=I("tensorListId",r,t,e),n=I("index",r,t,e),s=I("tensor",r,t,e),a=e.getTensorList(o.id);return a.setItem(n,s),[a.idTensor]}case"TensorListGetItem":{let o=I("tensorListId",r,t,e),n=I("index",r,t,e),s=I("elementShape",r,t,e),a=I("elementDType",r,t,e);return[e.getTensorList(o.id).getItem(n,s,a)]}case"TensorListScatterV2":case"TensorListScatter":{let o=I("indices",r,t,e),n=I("tensor",r,t,e),s=I("elementShape",r,t,e),a=I("numElements",r,t,e),i=NT(n,o,s,a);return e.addTensorList(i),[i.idTensor]}case"TensorListReserve":case"EmptyTensorList":{let o=I("elementShape",r,t,e),n=I("elementDType",r,t,e),s;r.op==="TensorListReserve"?s="numElements":s="maxNumElements";let a=I(s,r,t,e),i=r.op==="TensorListReserve"?-1:a,p=kT(o,n,a,i);return e.addTensorList(p),[p.idTensor]}case"TensorListGather":{let o=I("tensorListId",r,t,e),n=I("indices",r,t,e),s=I("elementShape",r,t,e),a=I("elemen
2022-11-18 17:13:29 +01:00
============================
Hi, looks like you are running TensorFlow.js in Node.js. To speed things up dramatically, install our node backend, visit https://github.com/tensorflow/tfjs-node for more details.
============================`));let n={id:this.nextDataId()};return this.data.set(n,{values:t,dtype:o,refCount:1}),n}makeTensorInfo(t,e,o){let n;if(e==="string"&&o!=null&&o.length>0&&y.isString(o[0])){let s=o.map(a=>y.encodeString(a));n=this.write(s,t,e)}else n=this.write(o,t,e);return{dataId:n,shape:t,dtype:e}}refCount(t){return this.data.has(t)?this.data.get(t).refCount:0}incRef(t){let e=this.data.get(t);e.refCount++}decRef(t){if(this.data.has(t)){let e=this.data.get(t);e.refCount--}}move(t,e,o,n,s){this.data.set(t,{values:e,dtype:n,refCount:s})}numDataIds(){return this.data.numDataIds()}async read(t){return this.readSync(t)}readSync(t){let{dtype:e,complexTensorInfos:o}=this.data.get(t);if(e==="complex64"){let n=this.readSync(o.real.dataId),s=this.readSync(o.imag.dataId);return w.mergeRealAndImagArrays(n,s)}return y.convertBackendValuesAndArrayBuffer(this.data.get(t).values,e)}bufferSync(t){let e=this.readSync(t.dataId);if(t.dtype==="string")try{let o=e.map(n=>y.decodeString(n));return me(t.shape,t.dtype,o)}catch(o){throw new Error("Failed to decode encoded string bytes into utf-8")}return me(t.shape,t.dtype,e)}makeOutput(t,e,o){return ur().makeTensorFromTensorInfo(this.makeTensorInfo(e,o,t),this)}disposeData(t,e=!1){if(this.data.has(t)){if(this.data.get(t).refCount--,!e&&this.data.get(t).refCount>0)return!1;let{complexTensorInfos:o}=this.data.get(t);o!=null&&(this.disposeData(o.real.dataId,!0),this.disposeData(o.imag.dataId,!0)),this.data.delete(t)}return!0}disposeIntermediateTensorInfo(t){this.disposeData(t.dataId)}async time(t){let e=y.now();return t(),{kernelMs:y.now()-e}}memory(){return{unreliable:!0,reasons:["The reported memory is an upper bound. Due to automatic garbage collection, the true allocated memory may be less."]}}where(t){Q([t],"where");let e=this.readSync(t.dataId);return A8(t.shape,e)}dispose(){}floatPrecision(){return 32}epsilon(){return super.epsilon()}};hc.nextDataId=0;var wc={};qe(wc,{addImpl:()=>zS,bincountImpl:()=>yc,bincountReduceImpl:()=>kf,bitwiseAndImpl:()=>VS,castImpl:()=>BS,ceilImpl:()=>WS,concatImpl:()=>np,equalImpl:()=>US,expImpl:()=>HS,expm1Impl:()=>qS,floorDivImpl:()=>XS,floorImpl:()=>jS,gatherNdImpl:()=>Nf,gatherV2Impl:()=>Tf,greaterEqualImpl:()=>QS,greaterImpl:()=>YS,lessEqualImpl:()=>JS,lessImpl:()=>ZS,linSpaceImpl:()=>_f,logImpl:()=>eI,maxImpl:()=>$f,maximumImpl:()=>tI,minimumImpl:()=>rI,multiplyImpl:()=>Ll,negImpl:()=>oI,notEqualImpl:()=>nI,prodImpl:()=>sI,raggedGatherImpl:()=>Ef,raggedRangeImpl:()=>Rf,raggedTensorToTensorImpl:()=>Df,rangeImpl:()=>ap,rsqrtImpl:()=>iI,scatterImpl:()=>zs,sigmoidImpl:()=>k_,simpleAbsImpl:()=>LS,sliceImpl:()=>ip,sparseFillEmptyRowsImpl:()=>Af,sparseReshapeImpl:()=>Ff,sparseSegmentReductionImpl:()=>Cc,sqrtImpl:()=>__,squaredDifferenceImpl:()=>pI,staticRegexReplaceImpl:()=>cI,stridedSliceImpl:()=>Pf,stringNGramsImpl:()=>up,stringSplitImpl:()=>pp,stringToHashBucketFastImpl:()=>cp,subImpl:()=>mI,tileImpl:()=>Of,topKImpl:()=>Mf,transposeImpl:()=>bc,uniqueImpl:()=>lp});function LS(r){let t=new Float32Array(r.length);for(let e=0;e<r.length;++e)t[e]=Math.abs(r[e]);return t}var F8=r=>{let{x:t}=r.inputs,e=r.backend;Q(t,"abs");let o=new Float32Array(y.sizeFromShape(t.shape)),n=e.data.get(t.dataId).values;return o=LS(n),e.makeOutput(o,t.shape,t.dtype)},XT={kernelName:Xs,backendName:"cpu",kernelFunc:F8};function ze(r){return(t,e,o,n,s)=>{let a=w.assertAndGetBroadcastShape(t,e),i=a.length,p=y.computeStrides(a),u=y.sizeFromShape(a),c=y.getTypedArrayFromDType(s,u),l=t.length,m=e.length,d=y.computeStrides(t),f=y.computeStrides(e),h=w.getBroadcastDims(t,a),g=w.getBroadcastDims(e,a);if(h.length+g.length===0)for(let x=0;x<c.length;++x)c[x]=r(o[x%o.length],n[x%n.length]);else for(let x=0;x<c.length;++x){let b=y.indexToLoc(x,i,p),C=b.slice(-l);h.forEach(E=>C[E]=0);let S=y.locToIndex(C,l,d),k=b.slice(-m);g.forEach(E=>k[E]=0);let _=y.locToIndex(k,m,f);c[x]=r(o[S],n[_])}return[c,a]}}function Ht(r){let{inputs:t,backend:e}=r,{real:o,imag:n}=t,s=e.data.get(o.dataId).values,a=e.data.get(n.dataId).values,i=e.makeTensorInfo(o.shape,"complex64"),p=e.data.get(i.dataId);return p.co
2022-11-18 17:13:29 +01:00
${s.shape}`);if(o.shape.length!==2)throw new Error(`Indices must be a matrix, saw:
${o.shape}`);if(n.shape.length!==1)throw new Error(`Values must be a vector, saw:
${n.shape}`);if(a.shape.length!==0)throw new Error(`Default value must be a scalar, saw:
${a.shape}`);let i=e.data.get(o.dataId).values,p=e.data.get(n.dataId).values,u=e.data.get(s.dataId).values,c=e.data.get(a.dataId).values[0],[l,m,d,f,h]=Af(i,o.shape,o.dtype,p,n.dtype,u,c);return[e.makeTensorInfo(m,o.dtype,l),e.makeTensorInfo([m[0]],n.dtype,d),e.makeTensorInfo([f.length],"bool",new Uint8Array(f.map(g=>Number(g)))),e.makeTensorInfo([h.length],o.dtype,new Int32Array(h))]}var XE={kernelName:Hi,backendName:"cpu",kernelFunc:w7};function S7(r){let{inputs:t,backend:e}=r,{inputIndices:o,inputShape:n,newShape:s}=t;if(o.shape.length!==2)throw new Error(`Input indices should be a matrix but received shape
2022-11-18 17:13:29 +01:00
${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=Array.from(e.data.get(n.dataId).values),i=e.data.get(o.dataId).values,p=Array.from(e.data.get(s.dataId).values),[u,c,l]=Ff(i,o.shape,o.dtype,a,p);return[e.makeTensorInfo(c,o.dtype,u),e.makeTensorInfo([l.length],s.dtype,new Int32Array(l))]}var YE={kernelName:ei,backendName:"cpu",kernelFunc:S7};function I7(r){let{inputs:t,backend:e}=r,{data:o,indices:n,segmentIds:s}=t;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);if(n.shape[0]!==s.shape[0])throw new Error("segmentIds and indices should have same size.");let a=e.data.get(o.dataId).values,i=e.data.get(n.dataId).values,p=e.data.get(s.dataId).values,[u,c]=Cc(a,o.shape,o.dtype,i,p,!0);return e.makeTensorInfo(c,o.dtype,u)}var QE={kernelName:ya,backendName:"cpu",kernelFunc:I7};function v7(r){let{inputs:t,backend:e}=r,{data:o,indices:n,segmentIds:s}=t;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);if(n.shape[0]!==s.shape[0])throw new Error("segmentIds and indices should have same size.");let a=e.data.get(o.dataId).values,i=e.data.get(n.dataId).values,p=e.data.get(s.dataId).values,[u,c]=Cc(a,o.shape,o.dtype,i,p);return e.makeTensorInfo(c,o.dtype,u)}var ZE={kernelName:ba,backendName:"cpu",kernelFunc:v7};function k7(r){let{inputs:t,backend:e,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=t,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=w.calculateShapes(s,n,i),d=!1,f=e.bufferSync(n),h;switch(s.dtype){case"bool":{let g=e.bufferSync(s),x=!!e.data.get(a.dataId).values[0];h=zs(f,g,i,m,c,u,p,l,x,d);break}case"float32":{let g=e.bufferSync(s),x=e.data.get(a.dataId).values[0];h=zs(f,g,i,m,c,u,p,l,x,d);break}case"int32":{let g=e.bufferSync(s),x=e.data.get(a.dataId).values[0];h=zs(f,g,i,m,c,u,p,l,x,d);break}case"string":{let g=e.bufferSync(s),x=y.decodeString(e.data.get(a.dataId).values[0]);h=zs(f,g,i,m,c,u,p,l,x,d);break}default:throw new Error(`Unsupported type ${s.dtype}`)}return e.makeTensorInfo(i,h.dtype,h.values)}var JE={kernelName:vs,backendName:"cpu",kernelFunc:k7};function N7(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{numOrSizeSplits:s,axis:a}=o,i=y.parseAxisParam(a,n.shape)[0],p=w.prepareSplitSize(n,s,i),u=new Array(n.shape.length).fill(0),c=n.shape.slice();return p.map(l=>{let m=[...c];m[i]=l;let d=Ao({inputs:{x:n},backend:e,attrs:{begin:u,size:m}});return u[i]+=l,d})}var eR={kernelName:xa,backendName:"cpu",kernelFunc:N7};var tR={kernelName:Ki,backendName:"cpu",kernelFunc:({inputs:r,backend:t})=>{let{x:e}=r,o=t;Q(e,"square");let n=o.data.get(e.dataId).values,s=new Float32Array(n.length);for(let i=0;i<n.length;++i){let p=n[i];s[i]=p*p}return{dataId:o.write(s,e.shape,e.dtype),shape:e.shape,dtype:e.dtype}}};var T7=Ie(wo,(r,t)=>{let e=t;return isNaN(r)?NaN:r>0?1:e.alpha}),rR={kernelName:wo,backendName:"cpu",kernelFunc:T7};function _7(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o;Q(n,"stridedSlice");let{finalShapeSparse:d,finalShape:f,isIdentity:h,sliceDim0:g,isSimpleSlice:x,begin:b,end:C,strides:S}=pt.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=Ve({inputs:{x:n},backend:e,attrs:{shape:f}});else if(g||x){y.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=pt.computeOutShape(b,C,S),E=Ao({inputs:{x:n},backend:e,attrs:{begin:b,size:_}});k=Ve({inputs:{x:E},backend:e,attrs:{shape:f}}),e.disposeIntermediateTensorInfo(E)}else{let _=e.bufferSync(n),E=Pf(d,_,S,b);k=e.makeTensorInfo(f,E.dtype,E.values)}return k}var oR={kernelName:Ns,backendName:"cpu",kernelFunc:_7};function $7(r){let{inputs:t,backend:e,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=t,m=e.data.get(c.dataId).values,d=e.data.get(l.dataId).values,[f,h]=up(m,d,n,s,a,i,p,u);return[e.makeTensorInfo([f.length],"string",f),e.makeTensorInfo(l.shape,"int32",h)]}var nR={kernelName:Ca,backendName:"cpu",kernelFunc:$7};function E7(r){let{inputs:t,backend:e,attrs:o}=r,{skipEmpty:n}=o,{input:s,delimiter:a}=t;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(s.shape.length!==1)throw new Error(`Input must be a vector, got shape: ${s.shape}`);if(a.shape.length!==0)throw new Error(`Delimiter must be a scalar, got shape: ${a.shape}`);let i=e.data.get(s.dataId).values,p=e.data.get(a.dataId).values[0],[u,c,l]=pp(i,p,n),m=c.length;return[e.makeTensorInfo([m,2],"int32",u),e.makeTensorInfo([m],"string",c),e.makeTensorInfo([2],"int32",new Int32Array(l))]}var sR={kernelName:qi,backendName:"cpu",kernelFunc:E7};function R7(r){let{inputs:t,backend:e,attrs:o}=r,{numBuckets:n}=o,{input:s}=t;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(n<=0)throw new Error("Number of buckets must be at least 1");let a=e.data.get(s.dataId).values,i=cp(a,n);return e.makeTensorInfo(s.shape,"int32",i)}var aR={kernelName:ji,backendName:"cpu",kernelFunc:R7};var D7=Ie(_s,r=>Math.tan(r)),
2022-11-20 22:20:02 +01:00
`),s=n.length.toString().length+2,a=n.map((l,m)=>y.rightPad((m+1).toString(),s)+l),i=0;for(let l=0;l<a.length;l++)i=Math.max(a[l].length,i);let p=a.slice(0,o-1),u=a.slice(o-1,o),c=a.slice(o);console.log(p.join(`
2023-08-05 15:03:11 +02:00
`)),console.log(t.split(`
2022-11-20 22:20:02 +01:00
`)[0]),console.log(`%c ${y.rightPad(u[0],i)}`,"border:1px solid red; background-color:#e3d2d2; color:#a61717"),console.log(c.join(`
`))}function RI(r){return fi(r,()=>r.createProgram(),"Unable to create WebGLProgram.")}function DI(r,t){if(ce(r,()=>r.linkProgram(t)),!A().get("ENGINE_COMPILE_ONLY")&&r.getProgramParameter(t,r.LINK_STATUS)===!1)throw console.log(r.getProgramInfoLog(t)),new Error("Failed to link vertex and fragment shaders.")}function jl(r,t){if(ce(r,()=>r.validateProgram(t)),r.getProgramParameter(t,r.VALIDATE_STATUS)===!1)throw console.log(r.getProgramInfoLog(t)),new Error("Shader program validation failed.")}function AI(r,t){let e=fi(r,()=>r.createBuffer(),"Unable to create WebGLBuffer");return ce(r,()=>r.bindBuffer(r.ARRAY_BUFFER,e)),ce(r,()=>r.bufferData(r.ARRAY_BUFFER,t,r.STATIC_DRAW)),e}function FI(r,t){let e=fi(r,()=>r.createBuffer(),"Unable to create WebGLBuffer");return ce(r,()=>r.bindBuffer(r.ELEMENT_ARRAY_BUFFER,e)),ce(r,()=>r.bufferData(r.ELEMENT_ARRAY_BUFFER,t,r.STATIC_DRAW)),e}function eZ(){return A().getNumber("WEBGL_VERSION")===2?1:4}function PI(r){return fi(r,()=>r.createTexture(),"Unable to create WebGLTexture.")}function OI(r,t){let e=A().getNumber("WEBGL_MAX_TEXTURE_SIZE");if(r<=0||t<=0){let o=`[${r}x${t}]`;throw new Error("Requested texture size "+o+" is invalid.")}if(r>e||t>e){let o=`[${r}x${t}]`,n=`[${e}x${e}]`;throw new Error("Requested texture size "+o+" greater than WebGL maximum on this browser / GPU "+n+".")}}function MI(r){return fi(r,()=>r.createFramebuffer(),"Unable to create WebGLFramebuffer.")}function qf(r,t,e,o,n,s,a){let i=r.getAttribLocation(t,e);return i===-1?!1:(ce(r,()=>r.bindBuffer(r.ARRAY_BUFFER,o)),ce(r,()=>r.vertexAttribPointer(i,n,r.FLOAT,!1,s,a)),ce(r,()=>r.enableVertexAttribArray(i)),!0)}function CR(r,t,e){SR(r,e),ce(r,()=>r.activeTexture(r.TEXTURE0+e)),ce(r,()=>r.bindTexture(r.TEXTURE_2D,t))}function tZ(r,t){SR(r,t),ce(r,()=>r.activeTexture(r.TEXTURE0+t)),ce(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function LI(r,t,e){return fi(r,()=>r.getUniformLocation(t,e),'uniform "'+e+'" not present in program.')}function BI(r,t,e){return r.getUniformLocation(t,e)}function zI(r,t,e,o){ce(r,()=>CR(r,t,o)),ce(r,()=>r.uniform1i(e,o))}function rZ(r){ce(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,null)),ce(r,()=>r.viewport(0,0,r.canvas.width,r.canvas.height)),ce(r,()=>r.scissor(0,0,r.canvas.width,r.canvas.height))}function Xl(r,t,e){ce(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,e)),ce(r,()=>r.framebufferTexture2D(r.FRAMEBUFFER,r.COLOR_ATTACHMENT0,r.TEXTURE_2D,t,0))}function jf(r,t){ce(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,t)),ce(r,()=>r.framebufferTexture2D(r.FRAMEBUFFER,r.COLOR_ATTACHMENT0,r.TEXTURE_2D,null,0))}function kc(r){let t=r.checkFramebufferStatus(r.FRAMEBUFFER);if(t!==r.FRAMEBUFFER_COMPLETE)throw new Error("Error binding framebuffer: "+wR(r,t))}function wR(r,t){switch(t){case r.FRAMEBUFFER_INCOMPLETE_ATTACHMENT:return"FRAMEBUFFER_INCOMPLETE_ATTACHMENT";case r.FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:return"FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT";case r.FRAMEBUFFER_INCOMPLETE_DIMENSIONS:return"FRAMEBUFFER_INCOMPLETE_DIMENSIONS";case r.FRAMEBUFFER_UNSUPPORTED:return"FRAMEBUFFER_UNSUPPORTED";default:return`unknown error ${t}`}}function fi(r,t,e){let o=ce(r,()=>t());if(o==null)throw new Error(e);return o}function SR(r,t){let e=r.MAX_COMBINED_TEXTURE_IMAGE_UNITS-1,o=t+r.TEXTURE0;if(o<r.TEXTURE0||o>e){let n=`[gl.TEXTURE0, gl.TEXTURE${e}]`;throw new Error(`textureUnit must be in ${n}.`)}}function hi(r,t=2){return y.sizeFromShape(r.slice(0,r.length-t))}function gi(r){if(r.length===0)throw Error("Cannot get rows and columns of an empty shape array.");return[r.length>1?r[r.length-2]:1,r[r.length-1]]}function Nc(r){let t=[1,1,1];return r.length===0||r.length===1&&r[0]===1||(t=[hi(r),...gi(r)]),t}function VI(r,t=!1){let e=A().getNumber("WEBGL_MAX_TEXTURE_SIZE"),o=A().getNumber("WEBGL_MAX_SIZE_FOR_NARROW_TEXTURE");o===1/0&&A().getBool("WEBGL_AUTO_SQUARIFY_NARROW_TEXTURE_SHAPE")&&(o=e/2),t&&(e=e*2,o=o*2,r=r.map((i,p)=>p>=r.length-2?y.nearestLargerEven(r[p]):r[p]),r.length===1&&(r=[2,r[0]])),r.length!==2&&(r=y.squeezeShape(r).newShape);let n=y.sizeFromShape(r),s=null;r.length<=1&&n<=e?s=[1,n]:r.length===2&&r[0]<=e&&r[1]
2022-11-18 17:13:29 +01:00
bool isnan_custom(float val) {
uint floatToUint = floatBitsToUint(val);
return (floatToUint & 0x7fffffffu) > 0x7f800000u;
}
bvec4 isnan_custom(vec4 val) {
return bvec4(isnan_custom(val.x),
isnan_custom(val.y), isnan_custom(val.z), isnan_custom(val.w));
}
#define isnan(value) isnan_custom(value)
`:"",p="",u=`
#define round(value) newRound(value)
int newRound(float value) {
return int(floor(value + 0.5));
}
ivec4 newRound(vec4 value) {
return ivec4(floor(value + vec4(0.5)));
}
2023-08-05 15:03:11 +02:00
`):(r="",t="attribute",e="varying",o="varying",n="texture2D",s="gl_FragColor",a="",i=`
2022-11-18 17:13:29 +01:00
#define isnan(value) isnan_custom(value)
bool isnan_custom(float val) {
return (val > 0. || val < 1. || val == 0.) ? false : true;
}
bvec4 isnan_custom(vec4 val) {
return bvec4(isnan(val.x), isnan(val.y), isnan(val.z), isnan(val.w));
}
`,p=`
uniform float INFINITY;
bool isinf(float val) {
return abs(val) == INFINITY;
}
bvec4 isinf(vec4 val) {
return equal(abs(val), vec4(INFINITY));
}
`,u=`
int round(float value) {
return int(floor(value + 0.5));
}
ivec4 round(vec4 value) {
return ivec4(floor(value + vec4(0.5)));
}
`),{version:r,attribute:t,varyingVs:e,varyingFs:o,texture2D:n,output:s,defineOutput:a,defineSpecialNaN:i,defineSpecialInf:p,defineRound:u}}function Ws(r,t,e="index"){let o=y.computeStrides(t);return o.map((n,s)=>{let a=`int ${r[s]} = ${e} / ${n}`,i=s===o.length-1?`int ${r[s+1]} = ${e} - ${r[s]} * ${n}`:`index -= ${r[s]} * ${n}`;return`${a}; ${i};`}).join("")}function hp(r,t,e="index"){let o=y.computeStrides(t);return o.map((n,s)=>{let a=`int ${r[s]} = ${e} / outShapeStrides[${s}]`,i=s===o.length-1?`int ${r[s+1]} = ${e} - ${r[s]} * outShapeStrides[${s}]`:`index -= ${r[s]} * outShapeStrides[${s}]`;return`${a}; ${i};`}).join("")}function aZ(r,t){let e=r.length,o=r.map(s=>`${t}[${s}]`),n=new Array(e-1);n[e-2]=o[e-1];for(let s=e-3;s>=0;--s)n[s]=`(${n[s+1]} * ${o[s+1]})`;return n}function IR(r,t,e="index"){let o=r.map((s,a)=>a),n=aZ(o,t);return n.map((s,a)=>{let i=`int ${r[a]} = ${e} / ${n[a]}`,p=a===n.length-1?`int ${r[a+1]} = ${e} - ${r[a]} * ${n[a]}`:`index -= ${r[a]} * ${n[a]}`;return`${i}; ${p};`}).join("")}function _c(r){let t=y.computeStrides(r).map(e=>e.toString());return`
2022-11-18 17:13:29 +01:00
int getFlatIndex(ivec3 coords) {
2023-08-05 15:03:11 +02:00
return coords.x * ${t[0]} + coords.y * ${t[1]} + coords.z;
2022-11-18 17:13:29 +01:00
}
`}function $c(){return`
2022-11-18 17:13:29 +01:00
int getFlatIndex(ivec3 coords) {
return coords.x * outShapeStrides[0] + coords.y * outShapeStrides[1] + coords.z;
}
`}var Yf=`
2022-11-18 17:13:29 +01:00
const float FLOAT_MAX = 1.70141184e38;
const float FLOAT_MIN = 1.17549435e-38;
lowp vec4 encode_float(highp float v) {
if (isnan(v)) {
return vec4(255, 255, 255, 255);
}
highp float av = abs(v);
if(av < FLOAT_MIN) {
return vec4(0.0, 0.0, 0.0, 0.0);
} else if(v > FLOAT_MAX) {
return vec4(0.0, 0.0, 128.0, 127.0) / 255.0;
} else if(v < -FLOAT_MAX) {
return vec4(0.0, 0.0, 128.0, 255.0) / 255.0;
}
highp vec4 c = vec4(0,0,0,0);
highp float e = floor(log2(av));
highp float m = exp2(fract(log2(av))) - 1.0;
c[2] = floor(128.0 * m);
m -= c[2] / 128.0;
c[1] = floor(32768.0 * m);
m -= c[1] / 32768.0;
c[0] = floor(8388608.0 * m);
highp float ebias = e + 127.0;
c[3] = floor(ebias / 2.0);
ebias -= c[3] * 2.0;
c[2] += floor(ebias) * 128.0;
c[3] += 128.0 * step(0.0, -v);
return c / 255.0;
}
`;var{getBroadcastDims:vR}=w;function kR(r,t,e){let o=[];if(r.forEach(d=>{let f=y.sizeFromShape(d.shapeInfo.logicalShape);if(d.shapeInfo.isUniform?o.push(`uniform float ${d.name}${f>1?`[${f}]`:""};`):(o.push(`uniform sampler2D ${d.name};`),o.push(`uniform int offset${d.name};`)),e.enableShapeUniforms){let{uniformShape:h}=Qf(e.packedInputs,d.shapeInfo.logicalShape,d.shapeInfo.texShape);switch(h.length){case 1:o.push(`uniform int ${d.name}Shape;`);break;case 2:o.push(`uniform ivec2 ${d.name}Shape;`);break;case 3:o.push(`uniform ivec3 ${d.name}Shape;`);break;case 4:o.push(`uniform ivec4 ${d.name}Shape;`);break;default:break}o.push(`uniform ivec2 ${d.name}TexShape;`)}}),e.enableShapeUniforms){switch(t.logicalShape.length){case 1:o.push("uniform int outShape;");break;case 2:o.push("uniform ivec2 outShape;"),o.push("uniform int outShapeStrides;");break;case 3:o.push("uniform ivec3 outShape;"),o.push("uniform ivec2 outShapeStrides;");break;case 4:o.push("uniform ivec4 outShape;"),o.push("uniform ivec3 outShapeStrides;");break;default:break}o.push("uniform ivec2 outTexShape;")}e.customUniforms&&e.customUniforms.forEach(d=>{o.push(`uniform ${d.type} ${d.name}${d.arrayIndex?`[${d.arrayIndex}]`:""};`)});let n=o.join(`
2023-08-05 15:03:11 +02:00
`),s=r.map(d=>iZ(d,t,e.packedInputs,e.enableShapeUniforms)).join(`
`),a=t.texShape,i=It(),p=cZ(i),u,c,l=dZ(i);return t.isPacked?(u=uZ(t.logicalShape,a,e.enableShapeUniforms),c=mZ(i)):(u=pZ(t.logicalShape,a,e.enableShapeUniforms),c=lZ(i)),e.packedInputs&&(l+=xZ),[l,p,c,n,u,s,e.userCode].join(`
`)}function Rc(r,t=!1){let e=r.shapeInfo.logicalShape;switch(e.length){case 0:return $Z(r,t);case 1:return RZ(r,t);case 2:return AZ(r,t);case 3:return PZ(r,t);case 4:return MZ(r,t);case 5:return LZ(r);case 6:return BZ(r);default:throw new Error(`${e.length}-D input sampling is not yet supported`)}}function NR(r,t){switch(r.shapeInfo.logicalShape.length){case 0:return _Z(r);case 1:return EZ(r,t);case 2:return DZ(r,t);case 3:return FZ(r,t);default:return OZ(r,t)}}function iZ(r,t,e=!1,o){let n="";e?n+=NR(r,o):n+=Rc(r,o);let s=r.shapeInfo.logicalShape,a=t.logicalShape;return s.length<=a.length&&(e?n+=zZ(r,t):n+=VZ(r,t)),n}function uZ(r,t,e){switch(r.length){case 0:return TR();case 1:return yZ(r,t,e);case 2:return NZ(r,t,e);case 3:return CZ(r,t,e);default:return SZ(r,t,e)}}function pZ(r,t,e){switch(r.length){case 0:return TR();case 1:return bZ(r,t,e);case 2:return TZ(r,t,e);case 3:return wZ(r,t,e);case 4:return IZ(r,t,e);case 5:return vZ(r,t);case 6:return kZ(r,t);default:throw new Error(`${r.length}-D output sampling is not yet supported`)}}function cZ(r){return`
2022-11-18 17:13:29 +01:00
float sampleTexture(sampler2D textureSampler, vec2 uv) {
return ${r.texture2D}(textureSampler, uv).r;
}
2023-08-05 15:03:11 +02:00
`}function lZ(r){return`
2022-11-18 17:13:29 +01:00
void setOutput(float val) {
${r.output} = vec4(val, 0, 0, 0);
}
2023-08-05 15:03:11 +02:00
`}function mZ(r){return`
2022-11-18 17:13:29 +01:00
void setOutput(vec4 val) {
${r.output} = val;
}
2023-08-05 15:03:11 +02:00
`}function dZ(r){return`${r.version}
2022-11-18 17:13:29 +01:00
precision highp float;
precision highp int;
precision highp sampler2D;
${r.varyingFs} vec2 resultUV;
${r.defineOutput}
const vec2 halfCR = vec2(0.5, 0.5);
struct ivec5
{
int x;
int y;
int z;
int w;
int u;
};
struct ivec6
{
int x;
int y;
int z;
int w;
int u;
int v;
};
uniform float NAN;
${r.defineSpecialNaN}
${r.defineSpecialInf}
${r.defineRound}
int imod(int x, int y) {
return x - y * (x / y);
}
int idiv(int a, int b, float sign) {
int res = a / b;
int mod = imod(a, b);
if (sign < 0. && mod != 0) {
res -= 1;
}
return res;
}
//Based on the work of Dave Hoskins
//https://www.shadertoy.com/view/4djSRW
#define HASHSCALE1 443.8975
float random(float seed){
vec2 p = resultUV * seed;
vec3 p3 = fract(vec3(p.xyx) * HASHSCALE1);
p3 += dot(p3, p3.yzx + 19.19);
return fract((p3.x + p3.y) * p3.z);
}
2023-08-05 15:03:11 +02:00
${fZ}
${hZ}
${gZ}
`}var fZ=`
2022-11-18 17:13:29 +01:00
vec2 uvFromFlat(int texNumR, int texNumC, int index) {
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 packedUVfrom1D(int texNumR, int texNumC, int index) {
int texelIndex = index / 2;
int texR = texelIndex / texNumC;
int texC = texelIndex - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
2023-08-05 15:03:11 +02:00
`,hZ=`
2022-11-18 17:13:29 +01:00
vec2 packedUVfrom2D(int texelsInLogicalRow, int texNumR,
int texNumC, int row, int col) {
int texelIndex = (row / 2) * texelsInLogicalRow + (col / 2);
int texR = texelIndex / texNumC;
int texC = texelIndex - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
2023-08-05 15:03:11 +02:00
`,gZ=`
2022-11-18 17:13:29 +01:00
vec2 packedUVfrom3D(int texNumR, int texNumC,
int texelsInBatch, int texelsInLogicalRow, int b,
int row, int col) {
int index = b * texelsInBatch + (row / 2) * texelsInLogicalRow + (col / 2);
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
2023-08-05 15:03:11 +02:00
`,xZ=`
2022-11-18 17:13:29 +01:00
float getChannel(vec4 frag, vec2 innerDims) {
vec2 modCoord = mod(innerDims, 2.);
return modCoord.x == 0. ?
(modCoord.y == 0. ? frag.r : frag.g) :
(modCoord.y == 0. ? frag.b : frag.a);
}
float getChannel(vec4 frag, int dim) {
float modCoord = mod(float(dim), 2.);
return modCoord == 0. ? frag.r : frag.g;
}
`;function TR(){return`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return 0;
}
2023-08-05 15:03:11 +02:00
`}function yZ(r,t,e){let o=[Math.ceil(t[0]/2),Math.ceil(t[1]/2)];return o[0]===1?e?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return 2 * int(resultUV.x * ceil(float(outTexShape[1]) / 2.0));
}
`:`
int getOutputCoords() {
return 2 * int(resultUV.x * ${o[1]}.0);
}
2023-08-05 15:03:11 +02:00
`:o[1]===1?e?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return 2 * int(resultUV.y * ceil(float(outTexShape[0]) / 2.0));
}
`:`
int getOutputCoords() {
return 2 * int(resultUV.y * ${o[0]}.0);
}
2023-08-05 15:03:11 +02:00
`:e?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
return 2 * (resTexRC.x * packedTexShape[1] + resTexRC.y);
}
`:`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
return 2 * (resTexRC.x * ${o[1]} + resTexRC.y);
}
2023-08-05 15:03:11 +02:00
`}function bZ(r,t,e){return t[0]===1?e?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return int(resultUV.x * float(outTexShape[1]));
}
`:`
int getOutputCoords() {
2023-08-05 15:03:11 +02:00
return int(resultUV.x * ${t[1]}.0);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`:t[1]===1?e?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return int(resultUV.y * float(outTexShape[0]));
}
`:`
int getOutputCoords() {
2023-08-05 15:03:11 +02:00
return int(resultUV.y * ${t[0]}.0);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`:e?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
return resTexRC.x * outTexShape[1] + resTexRC.y;
}
`:`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
2023-08-05 15:03:11 +02:00
vec2(${t[0]}, ${t[1]}));
return resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`}function CZ(r,t,e){if(e)return`
2022-11-18 17:13:29 +01:00
ivec3 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
int texelsInLogicalRow = int(ceil(float(outShape[2]) / 2.0));
int texelsInBatch = texelsInLogicalRow * int(ceil(float(outShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int b = index / texelsInBatch;
index -= b * texelsInBatch;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec3(b, r, c);
}
2023-08-05 15:03:11 +02:00
`;let o=[Math.ceil(t[0]/2),Math.ceil(t[1]/2)],n=Math.ceil(r[2]/2),s=n*Math.ceil(r[1]/2);return`
2022-11-18 17:13:29 +01:00
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
int b = index / ${s};
index -= b * ${s};
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec3(b, r, c);
}
2023-08-05 15:03:11 +02:00
`}function wZ(r,t,e){if(e)return`
2022-11-18 17:13:29 +01:00
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
${hp(["r","c","d"],r)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
2023-08-05 15:03:11 +02:00
`;let o=Ws(["r","c","d"],r);return`
2022-11-18 17:13:29 +01:00
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
2023-08-05 15:03:11 +02:00
vec2(${t[0]}, ${t[1]}));
int index = resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
${o}
return ivec3(r, c, d);
}
2023-08-05 15:03:11 +02:00
`}function SZ(r,t,e){if(e)return`
2022-11-18 17:13:29 +01:00
ivec4 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int texelsInLogicalRow = int(ceil(float(outShape[3]) / 2.0));
int texelsInBatch = texelsInLogicalRow * int(ceil(float(outShape[2]) / 2.0));
int texelsInBatchN = texelsInBatch * outShape[1];
int b2 = index / texelsInBatchN;
index -= b2 * texelsInBatchN;
int b = index / texelsInBatch;
index -= b * texelsInBatch;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec4(b2, b, r, c);
}
2023-08-05 15:03:11 +02:00
`;let o=[Math.ceil(t[0]/2),Math.ceil(t[1]/2)],n=Math.ceil(r[r.length-1]/2),s=n*Math.ceil(r[r.length-2]/2),a=s,i="",p="b, r, c";for(let u=2;u<r.length-1;u++)a*=r[r.length-u-1],i=`
2022-11-18 17:13:29 +01:00
int b${u} = index / ${a};
index -= b${u} * ${a};
`+i,p=`b${u}, `+p;return`
ivec${r.length} getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
${i}
int b = index / ${s};
index -= b * ${s};
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec${r.length}(${p});
}
2023-08-05 15:03:11 +02:00
`}function IZ(r,t,e){if(e)return`
2022-11-18 17:13:29 +01:00
ivec4 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
${hp(["r","c","d","d2"],r)}
2022-11-18 17:13:29 +01:00
return ivec4(r, c, d, d2);
}
2023-08-05 15:03:11 +02:00
`;let o=Ws(["r","c","d","d2"],r);return`
2022-11-18 17:13:29 +01:00
ivec4 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
2023-08-05 15:03:11 +02:00
vec2(${t[0]}, ${t[1]}));
int index = resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
${o}
return ivec4(r, c, d, d2);
}
2023-08-05 15:03:11 +02:00
`}function vZ(r,t){let e=Ws(["r","c","d","d2","d3"],r);return`
2022-11-18 17:13:29 +01:00
ivec5 getOutputCoords() {
2023-08-05 15:03:11 +02:00
ivec2 resTexRC = ivec2(resultUV.yx * vec2(${t[0]},
${t[1]}));
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
int index = resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
${e}
2022-11-18 17:13:29 +01:00
ivec5 outShape = ivec5(r, c, d, d2, d3);
return outShape;
}
2023-08-05 15:03:11 +02:00
`}function kZ(r,t){let e=Ws(["r","c","d","d2","d3","d4"],r);return`
2022-11-18 17:13:29 +01:00
ivec6 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
2023-08-05 15:03:11 +02:00
vec2(${t[0]}, ${t[1]}));
int index = resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
${e}
2022-11-18 17:13:29 +01:00
ivec6 result = ivec6(r, c, d, d2, d3, d4);
return result;
}
2023-08-05 15:03:11 +02:00
`}function NZ(r,t,e){let o=[Math.ceil(t[0]/2),Math.ceil(t[1]/2)];if(y.arraysEqual(r,t))return e?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
return 2 * ivec2(resultUV.yx * vec2(packedTexShape[0], packedTexShape[1]));
}
`:`
ivec2 getOutputCoords() {
return 2 * ivec2(resultUV.yx * vec2(${o[0]}, ${o[1]}));
}
2023-08-05 15:03:11 +02:00
`;let n=Math.ceil(r[1]/2);return e?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
int texelsInLogicalRow = int(ceil(float(outShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec2(r, c);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec2(r, c);
}
2023-08-05 15:03:11 +02:00
`}function TZ(r,t,e){return y.arraysEqual(r,t)?e?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
return ivec2(resultUV.yx * vec2(outTexShape[0], outTexShape[1]));
}
`:`
ivec2 getOutputCoords() {
2023-08-05 15:03:11 +02:00
return ivec2(resultUV.yx * vec2(${t[0]}, ${t[1]}));
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`:r[1]===1?e?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
return ivec2(index, 0);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
2023-08-05 15:03:11 +02:00
vec2(${t[0]}, ${t[1]}));
int index = resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
return ivec2(index, 0);
}
2023-08-05 15:03:11 +02:00
`:r[0]===1?e?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
return ivec2(0, index);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
2023-08-05 15:03:11 +02:00
vec2(${t[0]}, ${t[1]}));
int index = resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
return ivec2(0, index);
}
2023-08-05 15:03:11 +02:00
`:e?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
int r = index / outShape[1];
int c = index - r * outShape[1];
return ivec2(r, c);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
2023-08-05 15:03:11 +02:00
vec2(${t[0]}, ${t[1]}));
int index = resTexRC.x * ${t[1]} + resTexRC.y;
2022-11-18 17:13:29 +01:00
int r = index / ${r[1]};
int c = index - r * ${r[1]};
return ivec2(r, c);
}
`}function gp(r){return`offset${r}`}function _Z(r){let t=r.name,e="get"+t.charAt(0).toUpperCase()+t.slice(1),o=It();return`
2023-08-05 15:03:11 +02:00
vec4 ${e}() {
return ${o.texture2D}(${t}, halfCR);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`}function $Z(r,t){let e=r.name,o="get"+e.charAt(0).toUpperCase()+e.slice(1);if(r.shapeInfo.isUniform)return`float ${o}() {return ${e};}`;let[n,s]=r.shapeInfo.texShape;if(n===1&&s===1)return`
2022-11-18 17:13:29 +01:00
float ${o}() {
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, halfCR);
2022-11-18 17:13:29 +01:00
}
`;let a=gp(e);if(t)return`
2022-11-18 17:13:29 +01:00
float ${o}() {
2023-08-05 15:03:11 +02:00
vec2 uv = uvFromFlat(${e}TexShape[0], ${e}TexShape[1], ${a});
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`;let[i,p]=r.shapeInfo.texShape;return`
float ${o}() {
vec2 uv = uvFromFlat(${i}, ${p}, ${a});
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`}function EZ(r,t){let e=r.name,o="get"+e.charAt(0).toUpperCase()+e.slice(1),n=r.shapeInfo.texShape,s=It();if(t)return`
2022-11-18 17:13:29 +01:00
vec4 ${o}(int index) {
2023-08-05 15:03:11 +02:00
ivec2 packedTexShape = ivec2(ceil(float(${e}TexShape[0]) / 2.0), ceil(float(${e}TexShape[1]) / 2.0));
2022-11-18 17:13:29 +01:00
vec2 uv = packedUVfrom1D(
packedTexShape[0], packedTexShape[1], index);
2023-08-05 15:03:11 +02:00
return ${s.texture2D}(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`;let a=[Math.ceil(n[0]/2),Math.ceil(n[1]/2)];return`
vec4 ${o}(int index) {
vec2 uv = packedUVfrom1D(
${a[0]}, ${a[1]}, index);
2023-08-05 15:03:11 +02:00
return ${s.texture2D}(${e}, uv);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`}function RZ(r,t){let e=r.name,o="get"+e.charAt(0).toUpperCase()+e.slice(1);if(r.shapeInfo.isUniform)return`
2022-11-18 17:13:29 +01:00
float ${o}(int index) {
${Dc(r)}
2022-11-18 17:13:29 +01:00
}
`;let n=r.shapeInfo.texShape,s=n[0],a=n[1];if(a===1&&s===1)return`
float ${o}(int index) {
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, halfCR);
2022-11-18 17:13:29 +01:00
}
`;let i=gp(e);return a===1?t?`
2022-11-18 17:13:29 +01:00
float ${o}(int index) {
2023-08-05 15:03:11 +02:00
vec2 uv = vec2(0.5, (float(index + ${i}) + 0.5) / float(${e}TexShape[0]));
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`:`
float ${o}(int index) {
vec2 uv = vec2(0.5, (float(index + ${i}) + 0.5) / ${s}.0);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`:s===1?t?`
2022-11-18 17:13:29 +01:00
float ${o}(int index) {
2023-08-05 15:03:11 +02:00
vec2 uv = vec2((float(index + ${i}) + 0.5) / float(${e}TexShape[1]), 0.5);
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`:`
float ${o}(int index) {
vec2 uv = vec2((float(index + ${i}) + 0.5) / ${a}.0, 0.5);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`:t?`
2022-11-18 17:13:29 +01:00
float ${o}(int index) {
2023-08-05 15:03:11 +02:00
vec2 uv = uvFromFlat(${e}TexShape[0], ${e}TexShape[1], index + ${i});
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`:`
float ${o}(int index) {
vec2 uv = uvFromFlat(${s}, ${a}, index + ${i});
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`}function DZ(r,t){let e=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape,a=s[0],i=s[1],p=It();if(s!=null&&y.arraysEqual(e,s))return t?`
2022-11-18 17:13:29 +01:00
vec4 ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return ${p.texture2D}(${o}, uv);
}
`:`
vec4 ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${i}.0, ${a}.0);
return ${p.texture2D}(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`;if(t)return`
2022-11-18 17:13:29 +01:00
vec4 ${n}(int row, int col) {
ivec2 packedTexShape = ivec2(ceil(float(${o}TexShape[0]) / 2.0), ceil(float(${o}TexShape[1]) / 2.0));
int valuesPerRow = int(ceil(float(${o}Shape[1]) / 2.0));
vec2 uv = packedUVfrom2D(valuesPerRow, packedTexShape[0], packedTexShape[1], row, col);
return ${p.texture2D}(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`;let u=[Math.ceil(s[0]/2),Math.ceil(s[1]/2)],c=Math.ceil(e[1]/2);return`
2022-11-18 17:13:29 +01:00
vec4 ${n}(int row, int col) {
vec2 uv = packedUVfrom2D(${c}, ${u[0]}, ${u[1]}, row, col);
return ${p.texture2D}(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`}function AZ(r,t){let e=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape;if(s!=null&&y.arraysEqual(e,s)){if(t)return`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
2022-11-20 22:20:02 +01:00
`;let m=s[0],d=s[1];return`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
2022-11-20 22:20:02 +01:00
vec2 uv = (vec2(col, row) + halfCR) / vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
`}let{newShape:a,keptDims:i}=y.squeezeShape(e),p=a;if(p.length<e.length){let m=Ac(r,p),d=["row","col"];return`
${Rc(m,t)}
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
return ${n}(${Fc(d,i)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col) {
2023-08-05 15:03:11 +02:00
int index = round(dot(vec2(row, col), vec2(${e[1]}, 1)));
${Dc(r)}
2022-11-18 17:13:29 +01:00
}
`;let u=s[0],c=s[1],l=gp(o);return c===1?t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${o}Shape[1], 1, 1));
vec2 uv = vec2(0.5, (index + 0.5) / float(${o}TexShape[0]));
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
2023-08-05 15:03:11 +02:00
float index = dot(vec3(row, col, ${l}), vec3(${e[1]}, 1, 1));
2022-11-18 17:13:29 +01:00
vec2 uv = vec2(0.5, (index + 0.5) / ${u}.0);
return sampleTexture(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`:u===1?t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${o}Shape[1], 1, 1));
vec2 uv = vec2((index + 0.5) / float(${o}TexShape[1]), 0.5);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
2023-08-05 15:03:11 +02:00
float index = dot(vec3(row, col, ${l}), vec3(${e[1]}, 1, 1));
2022-11-18 17:13:29 +01:00
vec2 uv = vec2((index + 0.5) / ${c}.0, 0.5);
return sampleTexture(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`:t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${o}Shape[1] + col + ${l};
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
// Explicitly use integer operations as dot() only works on floats.
2023-08-05 15:03:11 +02:00
int index = row * ${e[1]} + col + ${l};
2022-11-18 17:13:29 +01:00
vec2 uv = uvFromFlat(${u}, ${c}, index);
return sampleTexture(${o}, uv);
}
`}function FZ(r,t){let e=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape,a=[Math.ceil(s[0]/2),Math.ceil(s[1]/2)];if(e[0]===1){let m=e.slice(1),d=[1,2],f=Ac(r,m),h=["b","row","col"];return`
${NR(f,t)}
2022-11-18 17:13:29 +01:00
vec4 ${n}(int b, int row, int col) {
return ${n}(${Fc(h,d)});
2022-11-18 17:13:29 +01:00
}
`}let i=It();if(t)return`
2022-11-18 17:13:29 +01:00
vec4 ${n}(int b, int row, int col) {
ivec2 packedTexShape = ivec2(ceil(float(${o}TexShape[0]) / 2.0), ceil(float(${o}TexShape[1]) / 2.0));
int valuesPerRow = int(ceil(float(${o}Shape[2]) / 2.0));
int texelsInBatch = valuesPerRow * int(ceil(float(${o}Shape[1]) / 2.0));
vec2 uv = packedUVfrom3D(
packedTexShape[0], packedTexShape[1], texelsInBatch, valuesPerRow, b, row, col);
return ${i.texture2D}(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`;let p=a[0],u=a[1],c=Math.ceil(e[2]/2),l=c*Math.ceil(e[1]/2);return`
2022-11-18 17:13:29 +01:00
vec4 ${n}(int b, int row, int col) {
vec2 uv = packedUVfrom3D(
${p}, ${u}, ${l}, ${c}, b, row, col);
return ${i.texture2D}(${o}, uv);
}
`}function PZ(r,t){let e=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=e[1]*e[2],a=e[2],{newShape:i,keptDims:p}=y.squeezeShape(e),u=i;if(u.length<e.length){let h=Ac(r,u),g=["row","col","depth"];return`
${Rc(h,t)}
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
return ${n}(${Fc(g,p)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col, int depth) {
int index = round(dot(vec3(row, col, depth),
vec3(${s}, ${a}, 1)));
${Dc(r)}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`;let c=r.shapeInfo.texShape,l=c[0],m=c[1],d=r.shapeInfo.flatOffset;if(m===s&&d==null)return t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
int stride1 = ${o}Shape[2];
float texR = float(row);
float texC = dot(vec2(col, depth), vec2(stride1, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
float texR = float(row);
float texC = dot(vec2(col, depth), vec2(${a}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${m}.0, ${l}.0);
return sampleTexture(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`;if(m===a&&d==null)return t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
float texR = dot(vec2(row, col), vec2(${o}Shape[1], 1));
float texC = float(depth);
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
2023-08-05 15:03:11 +02:00
float texR = dot(vec2(row, col), vec2(${e[1]}, 1));
2022-11-18 17:13:29 +01:00
float texC = float(depth);
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${m}.0, ${l}.0);
return sampleTexture(${o}, uv);
}
`;let f=gp(o);return t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
// Explicitly use integer operations as dot() only works on floats.
int stride0 = ${o}Shape[1] * ${o}Shape[2];
int stride1 = ${o}Shape[2];
2022-11-20 22:20:02 +01:00
int index = row * stride0 + col * stride1 + depth + ${f};
2022-11-18 17:13:29 +01:00
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
// Explicitly use integer operations as dot() only works on floats.
2022-11-20 22:20:02 +01:00
int index = row * ${s} + col * ${a} + depth + ${f};
2022-11-18 17:13:29 +01:00
vec2 uv = uvFromFlat(${l}, ${m}, index);
return sampleTexture(${o}, uv);
}
`}function OZ(r,t){let e=r.name,o="get"+e.charAt(0).toUpperCase()+e.slice(1),n=It();if(t)return`
2022-11-18 17:13:29 +01:00
vec4 ${o}(int b2, int b, int row, int col) {
2023-08-05 15:03:11 +02:00
int valuesPerRow = int(ceil(float(${e}Shape[3]) / 2.0));
int texelsInBatch = valuesPerRow * int(ceil(float(${e}Shape[2]) / 2.0));
2022-11-18 17:13:29 +01:00
int index = b * texelsInBatch + (row / 2) * valuesPerRow + (col / 2);
2023-08-05 15:03:11 +02:00
texelsInBatch *= ${e}Shape[1];
2022-11-18 17:13:29 +01:00
index = b2 * texelsInBatch + index;
2023-08-05 15:03:11 +02:00
ivec2 packedTexShape = ivec2(ceil(float(${e}TexShape[0]) / 2.0), ceil(float(${e}TexShape[1]) / 2.0));
2022-11-18 17:13:29 +01:00
int texR = index / packedTexShape[1];
int texC = index - texR * packedTexShape[1];
2023-08-05 15:03:11 +02:00
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(packedTexShape[1], packedTexShape[0]); return ${n.texture2D}(${e}, uv);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let s=r.shapeInfo.logicalShape,a=s.length,i=r.shapeInfo.texShape,p=[Math.ceil(i[0]/2),Math.ceil(i[1]/2)],u=p[0],c=p[1],l=Math.ceil(s[a-1]/2),m=l*Math.ceil(s[a-2]/2),d="int b, int row, int col",f=`b * ${m} + (row / 2) * ${l} + (col / 2)`;for(let h=2;h<a-1;h++)d=`int b${h}, `+d,m*=s[a-h-1],f=`b${h} * ${m} + `+f;return`
vec4 ${o}(${d}) {
int index = ${f};
2022-11-18 17:13:29 +01:00
int texR = index / ${c};
int texC = index - texR * ${c};
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${c}, ${u});
2023-08-05 15:03:11 +02:00
return ${n.texture2D}(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`}function MZ(r,t){let e=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=e[3],a=e[2]*s,i=e[1]*a,{newShape:p,keptDims:u}=y.squeezeShape(e);if(p.length<e.length){let b=Ac(r,p),C=["row","col","depth","depth2"];return`
${Rc(b,t)}
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
return ${n}(${Fc(C,u)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col, int depth, int depth2) {
int index = round(dot(vec4(row, col, depth, depth2),
vec4(${i}, ${a}, ${s}, 1)));
${Dc(r)}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`;let c=r.shapeInfo.flatOffset,l=r.shapeInfo.texShape,m=l[0],d=l[1],f=`int stride2 = ${o}Shape[3];`,h=`int stride1 = ${o}Shape[2] * stride2;`,g=`int stride0 = ${o}Shape[1] * stride1;`;if(d===i&&c==null)return t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
${h}
float texR = float(row);
float texC =
dot(vec3(col, depth, depth2),
vec3(stride1, stride2, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
float texR = float(row);
float texC =
dot(vec3(col, depth, depth2),
vec3(${a}, ${s}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
2023-08-05 15:03:11 +02:00
`;if(d===s&&c==null)return t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
float texR = dot(vec3(row, col, depth),
vec3(${o}Shape[1] * ${o}Shape[2], ${o}Shape[2], 1));
float texC = float(depth2);
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
float texR = dot(vec3(row, col, depth),
2023-08-05 15:03:11 +02:00
vec3(${e[1]*e[2]}, ${e[2]}, 1));
2022-11-18 17:13:29 +01:00
float texC = float(depth2);
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
`;let x=gp(o);return t?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
// Explicitly use integer operations as dot() only works on floats.
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
${h}
${g}
int index = row * stride0 + col * stride1 +
depth * stride2 + depth2;
2022-11-20 22:20:02 +01:00
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index + ${x});
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${i} + col * ${a} +
depth * ${s} + depth2;
2022-11-20 22:20:02 +01:00
vec2 uv = uvFromFlat(${m}, ${d}, index + ${x});
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
`}function LZ(r){let t=r.shapeInfo.logicalShape,e=r.name,o="get"+e.charAt(0).toUpperCase()+e.slice(1),n=t[4],s=t[3]*n,a=t[2]*s,i=t[1]*a,{newShape:p,keptDims:u}=y.squeezeShape(t);if(p.length<t.length){let h=Ac(r,p),g=["row","col","depth","depth2","depth3"];return`
${Rc(h)}
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
return ${o}(${Fc(g,u)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${o}(int row, int col, int depth, int depth2, int depth3) {
float index = dot(
vec4(row, col, depth, depth2),
vec4(${i}, ${a}, ${s}, ${n})) +
depth3;
${Dc(r)}
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let c=r.shapeInfo.flatOffset,l=r.shapeInfo.texShape,m=l[0],d=l[1];if(d===i&&c==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
int texR = row;
float texC = dot(vec4(col, depth, depth2, depth3),
vec4(${a}, ${s}, ${n}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;if(d===n&&c==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
float texR = dot(
vec4(row, col, depth, depth2),
2023-08-05 15:03:11 +02:00
vec4(${t[1]*t[2]*t[3]},
${t[2]*t[3]}, ${t[3]}, 1));
2022-11-18 17:13:29 +01:00
int texC = depth3;
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`;let f=gp(e);return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${i} + col * ${a} + depth * ${s} +
2022-11-20 22:20:02 +01:00
depth2 * ${n} + depth3 + ${f};
vec2 uv = uvFromFlat(${m}, ${d}, index);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`}function BZ(r){let t=r.shapeInfo.logicalShape,e=r.name,o="get"+e.charAt(0).toUpperCase()+e.slice(1),{newShape:n,keptDims:s}=y.squeezeShape(t);if(n.length<t.length){let g=Ac(r,n),x=["row","col","depth","depth2","depth3","depth4"];return`
${Rc(g)}
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
return ${o}(${Fc(x,s)});
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`}let a=t[5],i=t[4]*a,p=t[3]*i,u=t[2]*p,c=t[1]*u;if(r.shapeInfo.isUniform)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
int index = round(dot(
vec4(row, col, depth, depth2),
vec4(${c}, ${u}, ${p}, ${i})) +
dot(
vec2(depth3, depth4),
vec2(${a}, 1)));
${Dc(r)}
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let l=r.shapeInfo.flatOffset,m=r.shapeInfo.texShape,d=m[0],f=m[1];if(f===c&&l==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
int texR = row;
float texC = dot(vec4(col, depth, depth2, depth3),
vec4(${u}, ${p}, ${i}, ${a})) +
float(depth4);
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${f}.0, ${d}.0);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;if(f===a&&l==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
float texR = dot(vec4(row, col, depth, depth2),
2023-08-05 15:03:11 +02:00
vec4(${t[1]*t[2]*t[3]*t[4]},
${t[2]*t[3]*t[4]},
${t[3]*t[4]},
${t[4]})) + float(depth3);
2022-11-18 17:13:29 +01:00
int texC = depth4;
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${f}.0, ${d}.0);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`;let h=gp(e);return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${c} + col * ${u} + depth * ${p} +
depth2 * ${i} + depth3 * ${a} + depth4 + ${h};
2022-11-20 22:20:02 +01:00
vec2 uv = uvFromFlat(${d}, ${f}, index);
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, uv);
2022-11-18 17:13:29 +01:00
}
`}function Dc(r){let t=r.name,e=y.sizeFromShape(r.shapeInfo.logicalShape);return e<2?`return ${t};`:`
2023-08-05 15:03:11 +02:00
for (int i = 0; i < ${e}; i++) {
2022-11-18 17:13:29 +01:00
if (i == index) {
2023-08-05 15:03:11 +02:00
return ${t}[i];
2022-11-18 17:13:29 +01:00
}
}
`}function zZ(r,t){let e=r.name,o=e.charAt(0).toUpperCase()+e.slice(1),n="get"+o+"AtOutCoords",s=r.shapeInfo.logicalShape.length,a=t.logicalShape.length,i=vR(r.shapeInfo.logicalShape,t.logicalShape),p=Re(a),u=a-s,c,l=["x","y","z","w","u","v"];s===0?c="":a<2&&i.length>=1?c="coords = 0;":c=i.map(b=>`coords.${l[b+u]} = 0;`).join(`
2023-08-05 15:03:11 +02:00
`);let m="";a<2&&s>0?m="coords":m=r.shapeInfo.logicalShape.map((b,C)=>`coords.${l[C+u]}`).join(", ");let d="return outputValue;",h=y.sizeFromShape(r.shapeInfo.logicalShape)===1,x=y.sizeFromShape(t.logicalShape)===1;if(s===1&&!h&&!x)d=`
2022-11-18 17:13:29 +01:00
return vec4(outputValue.xy, outputValue.xy);
2022-11-20 22:20:02 +01:00
`;else if(h&&!x)a===1?d=`
2022-11-18 17:13:29 +01:00
return vec4(outputValue.x, outputValue.x, 0., 0.);
2022-11-20 22:20:02 +01:00
`:d=`
2022-11-18 17:13:29 +01:00
return vec4(outputValue.x);
`;else if(i.length){let b=s-2,C=s-1;i.indexOf(b)>-1&&i.indexOf(C)>-1?d="return vec4(outputValue.x);":i.indexOf(b)>-1?d="return vec4(outputValue.x, outputValue.y, outputValue.x, outputValue.y);":i.indexOf(C)>-1&&(d="return vec4(outputValue.xx, outputValue.zz);")}return`
2022-11-18 17:13:29 +01:00
vec4 ${n}() {
${p} coords = getOutputCoords();
${c}
vec4 outputValue = get${o}(${m});
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`}function VZ(r,t){let e=r.name,o=e.charAt(0).toUpperCase()+e.slice(1),n="get"+o+"AtOutCoords",s=t.texShape,a=r.shapeInfo.texShape,i=r.shapeInfo.logicalShape.length,p=t.logicalShape.length;if(!r.shapeInfo.isUniform&&i===p&&r.shapeInfo.flatOffset==null&&y.arraysEqual(a,s))return`
2022-11-18 17:13:29 +01:00
float ${n}() {
2023-08-05 15:03:11 +02:00
return sampleTexture(${e}, resultUV);
2022-11-18 17:13:29 +01:00
}
`;let u=Re(p),c=vR(r.shapeInfo.logicalShape,t.logicalShape),l=p-i,m,d=["x","y","z","w","u","v"];i===0?m="":p<2&&c.length>=1?m="coords = 0;":m=c.map(h=>`coords.${d[h+l]} = 0;`).join(`
2022-11-20 22:20:02 +01:00
`);let f="";return p<2&&i>0?f="coords":f=r.shapeInfo.logicalShape.map((h,g)=>`coords.${d[g+l]}`).join(", "),`
2022-11-18 17:13:29 +01:00
float ${n}() {
${u} coords = getOutputCoords();
${m}
2022-11-20 22:20:02 +01:00
return get${o}(${f});
2022-11-18 17:13:29 +01:00
}
`}function Re(r){if(r<=1)return"int";if(r===2)return"ivec2";if(r===3)return"ivec3";if(r===4)return"ivec4";if(r===5)return"ivec5";if(r===6)return"ivec6";throw Error(`GPU for rank ${r} is not yet supported`)}function Qf(r,t,e){let{newShape:o,keptDims:n}=y.squeezeShape(t),s=t.length,a=r&&s===3&&t[0]===1,i=a?t.slice(1):o,p=!r&&s>1&&!y.arraysEqual(t,e)&&o.length<s||a;return{useSqueezeShape:p,uniformShape:p?i:t,keptDims:n}}function Ac(r,t){let e=JSON.parse(JSON.stringify(r));return e.shapeInfo.logicalShape=t,e}function Fc(r,t){return t.map(e=>r[e]).join(", ")}function $R(r,t,e,o){let n=e.map((c,l)=>{let m={logicalShape:c.shape,texShape:c.isUniform?null:c.texData.texShape,isUniform:c.isUniform,isPacked:c.isUniform?!1:c.texData.isPacked,flatOffset:null};return c.texData!=null&&c.texData.slice!=null&&c.texData.slice.flatOffset>0&&(m.flatOffset=c.texData.slice.flatOffset),{name:t.variableNames[l],shapeInfo:m}}),s=n.map(c=>c.shapeInfo),a={logicalShape:o.shape,texShape:o.texData.texShape,isUniform:!1,isPacked:o.texData.isPacked,flatOffset:null},i=kR(n,a,t),p=EI(r.gl,i),u=r.createProgram(p);return A().get("ENGINE_COMPILE_ONLY")?{program:t,fragmentShader:p,source:i,webGLProgram:u,inShapeInfos:s,outShapeInfo:a,variablesLocations:null,customUniformLocations:null,infLoc:null,nanLoc:null,outShapeLocation:null,outShapeStridesLocation:null,outTexShapeLocation:null}:(r.buildVao(u),Object.assign({program:t,fragmentShader:p,source:i,webGLProgram:u,inShapeInfos:s,outShapeInfo:a},jI(r,t,u)))}function jI(r,t,e){let o=[],n=[],s,a,i,p=null,u=null;u=r.getUniformLocation(e,"NAN",!1),A().getNumber("WEBGL_VERSION")===1&&(p=r.getUniformLocation(e,"INFINITY",!1));let c=!1;for(let l of t.variableNames){let m={name:l,uniform:r.getUniformLocation(e,l,c),offset:r.getUniformLocation(e,`offset${l}`,c)};t.enableShapeUniforms&&(m.shape=r.getUniformLocation(e,`${l}Shape`,c),m.texShape=r.getUniformLocation(e,`${l}TexShape`,c)),o.push(m)}if(t.enableShapeUniforms&&(s=r.getUniformLocation(e,"outShape",c),i=r.getUniformLocation(e,"outShapeStrides",c),a=r.getUniformLocation(e,"outTexShape",c)),t.customUniforms)for(let l of t.customUniforms)n.push(r.getUniformLocation(e,l.name,c));return{variablesLocations:o,customUniformLocations:n,infLoc:p,nanLoc:u,outShapeLocation:s,outShapeStridesLocation:i,outTexShapeLocation:a}}function _R(r,t){if(r.length!==t.length)throw Error(`Binary was compiled with ${r.length} inputs, but was executed with ${t.length} inputs`);r.forEach((e,o)=>{let n=e.logicalShape,s=t[o],a=s.shape;if(!y.arraysEqual(n,a))throw Error(`Binary was compiled with different shapes than the current args. Shapes ${n} and ${a} must match`);if(e.isUniform&&s.isUniform)return;let i=e.texShape,p=s.isUniform?null:s.texData.texShape;if(!y.arraysEqual(i,p))throw Error(`Binary was compiled with different texture shapes than the current args. Shape ${i} and ${p} must match`)})}function ER(r,t,e,o,n){t.program.enableShapeUniforms||(_R(t.inShapeInfos,e),_R([t.outShapeInfo],[o]));let s=o.texData.texture,a=o.texData.texShape;o.texData.isPacked?r.setOutputPackedMatrixTexture(s.texture,a[0],a[1]):r.setOutputMatrixTexture(s.texture,a[0],a[1]),r.setProgram(t.webGLProgram),r.bindVertexArray(t.webGLProgram.vao),A().getNumber("WEBGL_VERSION")===1&&t.infLoc!==null&&r.gl.uniform1f(t.infLoc,1/0),t.nanLoc!==null&&r.gl.uniform1f(t.nanLoc,NaN);for(let p=0;p<e.length;++p){let u=e[p],{uniform:c,offset:l,shape:m,texShape:d}=t.variablesLocations[p];if(m){let{uniformShape:f}=Qf(t.program.packedInputs,u.shape,u.texData.texShape);switch(f.length){case 1:r.gl.uniform1iv(m,new Int32Array(f));break;case 2:r.gl.uniform2iv(m,new Int32Array(f));break;case 3:r.gl.uniform3iv(m,new Int32Array(f));break;case 4:r.gl.uniform4iv(m,new Int32Array(f));break;default:break}}if(d&&r.gl.uniform2i(d,u.texData.texShape[0],u.texData.texShape[1]),c!=null){if(u.isUniform){if(y.sizeFromShape(u.shape)<2)r.gl.uniform1f(c,u.uniformValues[0]);else{let f=u.uniformValues;f instanceof Float32Array||(f=new Float32Array(f)),r.gl.uniform1fv(c,f)}continue}u.texData.slice!=null&&l!=null&&r.gl.uniform1i(l,u.texData.slice.flatOffset)
2022-11-18 17:13:29 +01:00
ivec3 outCoordsFromFlatIndex(int index) {
${this.enableShapeUniforms?hp(["r","c","d"],t):Ws(["r","c","d"],t)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
void main() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(texShape[0], texShape[1]));
int index = 4 * (resTexRC.x * texShape[1] + resTexRC.y);
vec4 result = vec4(0.);
for (int i=0; i<4; i++) {
int flatIndex = index + i;
ivec3 rc = outCoordsFromFlatIndex(flatIndex);
result[i] = getA(rc.x, rc.y, rc.z);
}
2023-08-05 15:03:11 +02:00
${e.output} = result;
2022-11-18 17:13:29 +01:00
}
`}};var Jf=class{constructor(t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outPackingScheme=fu.DENSE,this.customUniforms=[{name:"texShape",type:"ivec2"}];let e=It();this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
ivec3 outCoordsFromFlatIndex(int index) {
${this.enableShapeUniforms?hp(["r","c","d"],t):Ws(["r","c","d"],t)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
void main() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(texShape[0], texShape[1]));
int index = 4 * (resTexRC.x * texShape[1] + resTexRC.y);
vec4 result = vec4(0.);
for (int i=0; i<4; i++) {
int flatIndex = index + i;
ivec3 rc = outCoordsFromFlatIndex(flatIndex);
result[i] = getChannel(getA(rc.x, rc.y, rc.z), vec2(rc.y, rc.z));
}
2023-08-05 15:03:11 +02:00
${e.output} = result;
2022-11-18 17:13:29 +01:00
}
`}};var eh=class{constructor(t){this.variableNames=["A"],this.outTexUsage=mr.DOWNLOAD;let e=It();this.outputShape=t,this.userCode=`
${Yf}
2022-11-18 17:13:29 +01:00
void main() {
float x = getAAtOutCoords();
2023-08-05 15:03:11 +02:00
${e.output} = encode_float(x);
2022-11-18 17:13:29 +01:00
}
`}};var th=class{constructor(t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!1,this.outTexUsage=mr.DOWNLOAD;let e=It();this.outputShape=t,this.userCode=`
${Yf}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
float x = getChannel(getAAtOutCoords(), vec2(coords.y, coords.z));
2023-08-05 15:03:11 +02:00
${e.output} = encode_float(x);
2022-11-18 17:13:29 +01:00
}
`}};var GZ={R:0,G:1,B:2,A:3},Yl=class{constructor(t,e=!1,o="RGBA"){this.variableNames=["A"],this.customUniforms=[{name:"texShape",type:"ivec2"}];let n=It();this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length);let s="result";e&&(s="floor(result * 255. + 0.5)");let a="";for(let i=0;i<o.length;i++){let p=o[i];a+=`
2022-11-18 17:13:29 +01:00
if(offset == ${i}) {
2023-08-05 15:03:11 +02:00
result = values[${GZ[p]}];
2022-11-18 17:13:29 +01:00
}`}this.userCode=`
${this.enableShapeUniforms?$c():_c(t)}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
int flatIndex = getFlatIndex(coords);
float result = 0.;
int offset = imod(flatIndex, ${o.length});
flatIndex = idiv(flatIndex, ${o.length}, 1.);
int r = flatIndex / texShape[1];
if (r < texShape[0]) {
int c = imod(flatIndex, texShape[1]);
vec2 uv = (vec2(c, r) + halfCR) / vec2(texShape[1], texShape[0]);
vec4 values = ${n.texture2D}(A, uv);
${a}
}
${n.output} = vec4(${s}, 0., 0., 0.);
}
`}};var rh=class{constructor(t,e=!1){this.variableNames=["A"],this.packedInputs=!1,this.packedOutput=!0,this.customUniforms=[{name:"texShape",type:"ivec2"}];let o=It();this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length);let n="",s="result";e&&(s="floor(result * 255. + 0.5)");for(let a=0;a<=1;a++)for(let i=0;i<=1;i++){let p=a*2+i;n+=`
2022-11-18 17:13:29 +01:00
localCoords = coords;
2023-08-05 15:03:11 +02:00
if(localCoords[2] + ${i} < ${this.enableShapeUniforms?"outShape[2]":`${t[2]}`}) {
2022-11-18 17:13:29 +01:00
localCoords[2] += ${i};
2023-08-05 15:03:11 +02:00
if (localCoords[1] + ${a} < ${this.enableShapeUniforms?"outShape[1]":`${t[1]}`}) {
2022-11-18 17:13:29 +01:00
localCoords[1] += ${a};
flatIndex = getFlatIndex(localCoords);
offset = imod(flatIndex, 4);
flatIndex = idiv(flatIndex, 4, 1.);
int r = flatIndex / texShape[1];
int c = imod(flatIndex, texShape[1]);
vec2 uv = (vec2(c, r) + halfCR) / vec2(texShape[1], texShape[0]);
values = ${o.texture2D}(A, uv);
if (offset == 0) {
result[${p}] = values[0];
} else if (offset == 1) {
result[${p}] = values[1];
} else if (offset == 2) {
result[${p}] = values[2];
} else {
result[${p}] = values[3];
}
}
}
`}this.userCode=`
${this.enableShapeUniforms?$c():_c(t)}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
vec4 result = vec4(0.);
int flatIndex, r, c, offset;
ivec3 localCoords;
vec2 uv;
vec4 values;
${n}
${o.output} = ${s};
}
`}};var lv={};qe(lv,{bindVertexProgramAttributeStreams:()=>ov,createBufferFromOutputTexture:()=>av,createFloat16MatrixTexture:()=>JI,createFloat16PackedMatrixTexture:()=>rv,createFloat32MatrixTexture:()=>ZI,createIndexBuffer:()=>QI,createPackedMatrixTexture:()=>tv,createUnsignedBytesMatrixTexture:()=>ev,createVertexBuffer:()=>YI,createVertexShader:()=>XI,downloadByteEncodedFloatMatrixFromOutputTexture:()=>uv,downloadFloat32MatrixFromBuffer:()=>iv,downloadMatrixFromPackedOutputTexture:()=>cv,downloadPackedMatrixFromBuffer:()=>pv,getInternalFormatForFloat16MatrixTexture:()=>nh,getInternalFormatForFloat16PackedMatrixTexture:()=>ih,getInternalFormatForFloat32MatrixTexture:()=>oh,getInternalFormatForPackedMatrixTexture:()=>ah,getInternalFormatForUnsignedBytesMatrixTexture:()=>sh,uploadDenseMatrixToTexture:()=>nv,uploadPixelDataToTexture:()=>sv});function XI(r){let t=It(),e=`${t.version}
2022-11-18 17:13:29 +01:00
precision highp float;
2023-08-05 15:03:11 +02:00
${t.attribute} vec3 clipSpacePos;
${t.attribute} vec2 uv;
${t.varyingVs} vec2 resultUV;
2022-11-18 17:13:29 +01:00
void main() {
gl_Position = vec4(clipSpacePos, 1);
resultUV = uv;
}`;return $I(r,e)}function YI(r){let t=new Float32Array([-1,1,0,0,1,-1,-1,0,0,0,1,1,0,1,1,1,-1,0,1,0]);return AI(r,t)}function QI(r){let t=new Uint16Array([0,1,2,2,1,3]);return FI(r,t)}function Ql(r,t,e,o,n,s){OI(t,e);let a=PI(r),i=r.TEXTURE_2D;return ce(r,()=>r.bindTexture(i,a)),ce(r,()=>r.texParameteri(i,r.TEXTURE_WRAP_S,r.CLAMP_TO_EDGE)),ce(r,()=>r.texParameteri(i,r.TEXTURE_WRAP_T,r.CLAMP_TO_EDGE)),ce(r,()=>r.texParameteri(i,r.TEXTURE_MIN_FILTER,r.NEAREST)),ce(r,()=>r.texParameteri(i,r.TEXTURE_MAG_FILTER,r.NEAREST)),A().getNumber("WEBGL_VERSION")===1?ce(r,()=>r.texImage2D(i,0,o,t,e,0,n,s,null)):ce(r,()=>r.texStorage2D(i,1,o,t,e)),ce(r,()=>r.bindTexture(r.TEXTURE_2D,null)),{texture:a,texShape:[e,t]}}function oh(r){return r.internalFormatFloat}function ZI(r,t,e,o){let[n,s]=fp(t,e);return Ql(r,n,s,oh(o),o.textureFormatFloat,r.FLOAT)}function nh(r){return r.internalFormatHalfFloat}function JI(r,t,e,o){let[n,s]=fp(t,e);return Ql(r,n,s,nh(o),o.textureFormatFloat,o.textureTypeHalfFloat)}function sh(r){return r.downloadTextureFormat}function ev(r,t,e,o){let[n,s]=fp(t,e);return Ql(r,n,s,sh(o),r.RGBA,r.UNSIGNED_BYTE)}function ah(r){return r.internalFormatPackedFloat}function tv(r,t,e,o){let[n,s]=Ma(t,e);return Ql(r,n,s,ah(o),r.RGBA,r.FLOAT)}function ih(r){return r.internalFormatPackedHalfFloat}function rv(r,t,e,o){let[n,s]=Ma(t,e);return Ql(r,n,s,ih(o),r.RGBA,o.textureTypeHalfFloat)}function ov(r,t,e){return ce(r,()=>r.bindBuffer(r.ARRAY_BUFFER,e)),qf(r,t,"clipSpacePos",e,3,20,0)&&qf(r,t,"uv",e,2,20,12)}function nv(r,t,e,o,n,s){ce(r,()=>r.bindTexture(r.TEXTURE_2D,t));let a,i,p;n instanceof Uint8Array?(a=new Uint8Array(e*o*4),i=r.UNSIGNED_BYTE,p=r.RGBA):(a=new Float32Array(e*o*4),i=r.FLOAT,p=s.internalFormatPackedFloat),a.set(n),A().getNumber("WEBGL_VERSION")===2?ce(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,e,o,r.RGBA,i,a)):ce(r,()=>r.texImage2D(r.TEXTURE_2D,0,p,e,o,0,r.RGBA,i,a)),ce(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function sv(r,t,e){ce(r,()=>r.bindTexture(r.TEXTURE_2D,t)),e.data instanceof Uint8Array?A().getNumber("WEBGL_VERSION")===2?ce(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,e.width,e.height,r.RGBA,r.UNSIGNED_BYTE,e.data)):ce(r,()=>r.texImage2D(r.TEXTURE_2D,0,r.RGBA,e.width,e.height,0,r.RGBA,r.UNSIGNED_BYTE,e.data)):A().getNumber("WEBGL_VERSION")===2?ce(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,r.RGBA,r.UNSIGNED_BYTE,e)):ce(r,()=>r.texImage2D(r.TEXTURE_2D,0,r.RGBA,r.RGBA,r.UNSIGNED_BYTE,e)),ce(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function av(r,t,e,o){let n=r.createBuffer();ce(r,()=>r.bindBuffer(r.PIXEL_PACK_BUFFER,n));let i=4*4*t*e;return ce(r,()=>r.bufferData(r.PIXEL_PACK_BUFFER,i,r.STREAM_READ)),ce(r,()=>r.readPixels(0,0,e,t,r.RGBA,r.FLOAT,0)),ce(r,()=>r.bindBuffer(r.PIXEL_PACK_BUFFER,null)),n}function iv(r,t,e){let o=r,n=new Float32Array(e);return o.bindBuffer(o.PIXEL_PACK_BUFFER,t),o.getBufferSubData(o.PIXEL_PACK_BUFFER,0,n),o.bindBuffer(o.PIXEL_PACK_BUFFER,null),n}function uv(r,t,e,o){let[n,s]=fp(t,e),a=4,i=new Uint8Array(xR(t*e,a));return ce(r,()=>r.readPixels(0,0,n,s,o.downloadTextureFormat,r.UNSIGNED_BYTE,i)),new Float32Array(i.buffer)}function pv(r,t,e,o,n,s,a,i){let p=r,u=new Float32Array(yR(s,a));return p.bindBuffer(p.PIXEL_PACK_BUFFER,t),p.getBufferSubData(p.PIXEL_PACK_BUFFER,0,u),p.bindBuffer(p.PIXEL_PACK_BUFFER,null),u}function cv(r,t,e){let o=new Float32Array(t*e*4);return ce(r,()=>r.readPixels(0,0,e,t,r.RGBA,r.FLOAT,o)),o}var xp=class{constructor(t){this.outputTexture=null,this.program=null,this.disposed=!1,this.itemsToPoll=[];let e=A().getNumber("WEBGL_VERSION");if(t!=null?(this.gl=t,kI(e,t)):this.gl=Hr(e),t=this.gl,A().getNumber("WEBGL_VERSION")===2){let s=t;this.createVertexArray=()=>ce(s,()=>s.createVertexArray()),this.bindVertexArray=a=>ce(s,()=>s.bindVertexArray(a)),this.deleteVertexArray=a=>ce(s,()=>s.deleteVertexArray(a)),this.getVertexArray=()=>ce(s,()=>s.getParameter(s.VERTEX_ARRAY_BINDING))}else if(t!=null){let s=t.getExtension("OES_vertex_array_object");if(s==null)throw new Error("All WebGL1 implementations are expected to offer OES_vertex_array_object.");this.createVertexArray=()=>c
2022-11-18 17:13:29 +01:00
void main() {
setOutput(vec4(getA(), 0., 0., 0.));
}
`;else{let e=Rt("rc",this.rank),o=Re(this.rank),n=this.getOutOfBoundsCondition(e),s=this.getSetup(e),a=this.getOutput(e);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o} rc = getOutputCoords();
if(${n}) {
setOutput(vec4(0));
} else {
${s}
setOutput(vec4(${a}));
}
}
2023-08-05 15:03:11 +02:00
`}}getSourceCoordsArr(t){let e=[];for(let o=0;o<=1;o++)for(let n=0;n<=1;n++){let s=`${o===0?"r":"rp1"}, ${n===0?"c":"cp1"}`;for(let a=2;a<this.rank;a++)s=`${t[t.length-1-a]},`+s;e.push(s)}return e}getOutOfBoundsCondition(t){if(this.rank===1)return`rc > ${this.enableShapeUniforms?"outShape":this.outputShape[0]}`;let e="";for(let o=this.rank-2;o<this.rank;o++)e+=`${t[o]} >= ${this.enableShapeUniforms?`outShape[${o}]`:this.outputShape[o]}`,o<this.rank-1&&(e+="||");return e}getSetup(t){if(this.rank===1)return"";let e=t.slice(-2),o=this.enableShapeUniforms?`outShape[${this.rank} - 1]`:this.outputShape[this.rank-1],n=this.enableShapeUniforms?`outShape[${this.rank} - 2]`:this.outputShape[this.rank-2];return`
int r = ${e[0]};
int c = ${e[1]};
2022-11-18 17:13:29 +01:00
int rp1 = r + 1;
int cp1 = c + 1;
bool cEdge = cp1 >= ${o};
bool rEdge = rp1 >= ${n};
2023-08-05 15:03:11 +02:00
`}getOutput(t){let e=this.getSourceCoordsArr(t);return this.rank===1?`getA(rc), (rc + 1 >= ${this.enableShapeUniforms?"outShape":this.outputShape[0]} ? 0. : getA(rc + 1)), 0, 0`:`getA(${e[0]}),
cEdge ? 0. : getA(${e[1]}),
rEdge ? 0. : getA(${e[2]}),
rEdge || cEdge ? 0. : getA(${e[3]})`}};var Pc=class{constructor(t,e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"inputShape",type:"ivec3"}],this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length);let o="";for(let n=0;n<4;n++){let s="thisRC = rc;";n%2===1&&(s+="thisRC.z += 1;"),n>1&&(s+="thisRC.y += 1;"),o+=`
2022-11-18 17:13:29 +01:00
${s}
${n>0?"if(thisRC.y < rows && thisRC.z < cols){":""}
int flatIndex = getFlatIndex(thisRC);
ivec3 inputRC = inputCoordsFromReshapedOutCoords(flatIndex);
vec2 inputRCInnerDims = vec2(float(inputRC.y),float(inputRC.z));
result[${n}] =
getChannel(getA(inputRC.x, inputRC.y, inputRC.z), inputRCInnerDims);
${n>0?"}":""}
`}this.userCode=`
2023-08-05 15:03:11 +02:00
${KZ(e,this.enableShapeUniforms)}
${this.enableShapeUniforms?$c():_c(t)}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 rc = getOutputCoords();
vec4 result = vec4(0.);
ivec3 thisRC;
2023-08-05 15:03:11 +02:00
int rows = ${this.enableShapeUniforms?"outShape[1]":t[1]};
int cols = ${this.enableShapeUniforms?"outShape[2]":t[2]};
2022-11-18 17:13:29 +01:00
${o}
setOutput(result);
}
2023-08-05 15:03:11 +02:00
`}};function KZ(r,t){return`
2022-11-18 17:13:29 +01:00
ivec3 inputCoordsFromReshapedOutCoords(int index) {
${t?IR(["r","c","d"],"inputShape"):Ws(["r","c","d"],r)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
`}var mh=class{constructor(t){this.gpgpu=t,this.numUsedTextures=0,this.numFreeTextures=0,this._numBytesAllocated=0,this._numBytesFree=0,this.freeTextures={},this.usedTextures={},this.logEnabled=!1}acquireTexture(t,e,o){let n=kD(e,o),s=ND(t,n,o);s in this.freeTextures||(this.freeTextures[s]=[]),s in this.usedTextures||(this.usedTextures[s]=[]);let a=vD(t,n,this.gpgpu.gl,this.gpgpu.textureConfig,o);if(this.freeTextures[s].length>0){this.numFreeTextures--,this.numUsedTextures++,this._numBytesFree-=a,this.log();let p=this.freeTextures[s].pop();return this.usedTextures[s].push(p),p}let i;return n===er.PACKED_2X2_FLOAT32?i=this.gpgpu.createPackedMatrixTexture(t[0],t[1]):n===er.PACKED_2X2_FLOAT16?i=this.gpgpu.createFloat16PackedMatrixTexture(t[0],t[1]):n===er.UNPACKED_FLOAT32?i=this.gpgpu.createFloat32MatrixTexture(t[0],t[1]):n===er.UNPACKED_FLOAT16?i=this.gpgpu.createFloat16MatrixTexture(t[0],t[1]):n===er.PACKED_4X1_UNSIGNED_BYTE&&(i=this.gpgpu.createUnsignedBytesMatrixTexture(t[0],t[1])),this.usedTextures[s].push(i),this.numUsedTextures++,this._numBytesAllocated+=a,this.log(),i}releaseTexture(t,e,o,n){if(this.freeTextures==null)return;let s=kD(o,n),a=ND(e,s,n);a in this.freeTextures||(this.freeTextures[a]=[]);let i=vD(e,s,this.gpgpu.gl,this.gpgpu.textureConfig,n),p=A().getNumber("WEBGL_DELETE_TEXTURE_THRESHOLD");p!==-1&&this._numBytesAllocated>p?(this.gpgpu.deleteMatrixTexture(t.texture),this._numBytesAllocated-=i):(this.freeTextures[a].push(t),this.numFreeTextures++,this._numBytesFree+=i),this.numUsedTextures--;let u=this.usedTextures[a],c=u&&u.indexOf(t);if(c==null||c<0)throw new Error("Cannot release a texture that was never provided by this texture manager");u[c]=u[u.length-1],u.pop(),this.log()}log(){if(!this.logEnabled)return;let t=this.numFreeTextures+this.numUsedTextures;console.log("Free/Used",`${this.numFreeTextures} / ${this.numUsedTextures}`,`(${t})`);let e=this._numBytesFree/this._numBytesAllocated;console.log(`Bytes allocated: ${this._numBytesAllocated}`),console.log(`Bytes unused: ${this._numBytesFree} (${Math.round(100*e)}%)`)}get numBytesAllocated(){return this._numBytesAllocated}get numBytesFree(){return this._numBytesFree}getNumUsedTextures(){return this.numUsedTextures}getNumFreeTextures(){return this.numFreeTextures}dispose(){if(this.freeTextures!=null){for(let t in this.freeTextures)this.freeTextures[t].forEach(e=>{this.gpgpu.deleteMatrixTexture(e.texture)});for(let t in this.usedTextures)this.usedTextures[t].forEach(e=>{this.gpgpu.deleteMatrixTexture(e.texture)});this.freeTextures=null,this.usedTextures=null,this.numUsedTextures=0,this.numFreeTextures=0,this._numBytesAllocated=0,this._numBytesFree=0}}};function qZ(r,t){let e=r;if(t===e.R32F)return 4;if(t===e.R16F)return 2;if(t===e.RGBA32F)return 16;if(t===r.RGBA)return 16;if(t===e.RGBA16F)return 8;if(t===e.RGBA8)return 4;throw new Error(`Unknown internal format ${t}`)}function vD(r,t,e,o,n){let s=jZ(t,o),a;if(n){let[p,u]=Ma(r[0],r[1]);a=p*u}else{let[p,u]=fp(r[0],r[1]);a=p*u}let i=qZ(e,s);return a*i}function jZ(r,t){switch(r){case er.PACKED_2X2_FLOAT32:return ah(t);case er.PACKED_2X2_FLOAT16:return ih(t);case er.UNPACKED_FLOAT32:return oh(t);case er.UNPACKED_FLOAT16:return nh(t);case er.PACKED_4X1_UNSIGNED_BYTE:return sh(t);default:throw new Error(`Unknown physical texture type ${r}`)}}function XZ(r){return A().getBool("WEBGL_RENDER_FLOAT32_ENABLED")?r?er.PACKED_2X2_FLOAT32:er.UNPACKED_FLOAT32:r?er.PACKED_2X2_FLOAT16:er.UNPACKED_FLOAT16}function kD(r,t){if(r===mr.UPLOAD)return er.PACKED_2X2_FLOAT32;if(r===mr.RENDER||r==null)return XZ(t);if(r===mr.DOWNLOAD||r===mr.PIXELS)return er.PACKED_4X1_UNSIGNED_BYTE;throw new Error(`Unknown logical texture type ${r}`)}function ND(r,t,e){return`${r[0]}_${r[1]}_${t}_${e}`}var tr=class{constructor(t,e){this.variableNames=["A"],this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
float unaryOperation(float x) {
2023-08-05 15:03:11 +02:00
${e}
2022-11-18 17:13:29 +01:00
}
void main() {
float x = getAAtOutCoords();
float y = unaryOperation(x);
setOutput(y);
}
`}},Wt="if (isnan(x)) return x;",TD="return x;",dv="return abs(x);";var _D="return (x >= 0.0) ? x : (exp(x) - 1.0);",$D=Wt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : x;
`,ED=Wt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : min(6.0, x);
`,La="return x;",RD="return 1.0 / (1.0 + exp(-1.0 * x));";var AD="return x;",FD=`
2022-11-18 17:13:29 +01:00
vec4 result;
result.r = (x.r >= 0.0) ? x.r : (exp(x.r) - 1.0);
result.g = (x.g >= 0.0) ? x.g : (exp(x.g) - 1.0);
result.b = (x.b >= 0.0) ? x.b : (exp(x.b) - 1.0);
result.a = (x.a >= 0.0) ? x.a : (exp(x.a) - 1.0);
return result;
`,PD=`
2022-11-18 17:13:29 +01:00
vec4 result = x * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,OD=`
2022-11-18 17:13:29 +01:00
vec4 result = min(x, vec4(6.)) * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,MD="return 1.0 / (1.0 + exp(-1.0 * x));",Ar=class{constructor(t,e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
vec4 unaryOperation(vec4 x) {
2023-08-05 15:03:11 +02:00
${e}
2022-11-18 17:13:29 +01:00
}
void main() {
vec4 x = getAAtOutCoords();
vec4 y = unaryOperation(x);
setOutput(y);
}
`}};var dh=class{constructor(t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!1,this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length);let e=t.length,o=Rt("rc",e),n=Re(e),s=ID(e,o),a=o.slice(-2),i=e<=1?"rc":`vec2(${a.join(",")})`;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} rc = getOutputCoords();
vec4 packedInput = getA(${s});
setOutput(getChannel(packedInput, ${i}));
}
`}};var QZ=Vt.whereImpl,ZZ=1e-7,JZ=1e-4,fh={};function e9(r){return r in fh||(fh[r]={}),fh[r]}var t9=A().getNumber("CPU_HANDOFF_SIZE_THRESHOLD"),r9=600;function o9(){return A().global.screen==null?1024:A().global.screen.height*A().global.screen.width*window.devicePixelRatio*r9/1024/1024}var Oc=class r extends so{nextDataId(){return r.nextDataId++}constructor(t){if(super(),this.pendingRead=new WeakMap,this.pendingDisposal=new WeakSet,this.dataRefCount=new WeakMap,this.numBytesInGPU=0,this.uploadWaitMs=0,this.downloadWaitMs=0,this.lastGlFlushTime=0,this.warnedAboutMemory=!1,this.pendingDeletes=0,this.disposed=!1,!A().getBool("HAS_WEBGL"))throw new Error("WebGL is not supported on this device");let e;if(t!=null){if(t instanceof xp)e=t;else{let o=Hr(A().getNumber("WEBGL_VERSION"),t);e=new xp(o)}this.binaryCache={},this.gpgpuCreatedLocally=!1}else{let o=Hr(A().getNumber("WEBGL_VERSION"));e=new xp(o),this.binaryCache=e9(A().getNumber("WEBGL_VERSION")),this.gpgpuCreatedLocally=!0}this.gpgpu=e,this.canvas=this.gpgpu.gl.canvas,this.textureManager=new mh(this.gpgpu),this.numMBBeforeWarning=o9(),this.texData=new Bo(this,ur())}numDataIds(){return this.texData.numDataIds()-this.pendingDeletes}writeTexture(t,e,o,n,s,a){let i=this.makeTensorInfo(e,o),p=this.texData.get(i.dataId);p.isPacked=!1,p.texture={texture:t,texShape:[n,s]},p.texShape=[n,s];let u=Nc(e),c=new Yl(u,!1,a),l=this.runWebGLProgram(c,[i],o,[[n,s]]);return l.shape=e,p.texture=null,this.disposeIntermediateTensorInfo(i),l.dataId}write(t,e,o){if((A().getBool("WEBGL_CHECK_NUMERICAL_PROBLEMS")||A().getBool("DEBUG"))&&this.checkNumericalProblems(t),o==="complex64"&&t!=null)throw new Error("Cannot write to a complex64 dtype. Please use tf.complex(real, imag).");let n={id:this.nextDataId()};return this.texData.set(n,{shape:e,dtype:o,values:t,usage:mr.UPLOAD,refCount:1}),n}refCount(t){return this.texData.has(t)?this.texData.get(t).refCount:0}incRef(t){let e=this.texData.get(t);e.refCount++}decRef(t){if(this.texData.has(t)){let e=this.texData.get(t);e.refCount--}}move(t,e,o,n,s){if(A().getBool("DEBUG")&&this.checkNumericalProblems(e),n==="complex64")throw new Error("Cannot write to a complex64 dtype. Please use tf.complex(real, imag).");this.texData.set(t,{shape:o,dtype:n,values:e,usage:mr.UPLOAD,refCount:s})}disposeIntermediateTensorInfo(t){this.disposeData(t.dataId)}readSync(t){let e=this.texData.get(t),{values:o,dtype:n,complexTensorInfos:s,slice:a,shape:i,isPacked:p}=e;if(a!=null){let m;p?m=new Ar(i,La):m=new tr(i,La);let d=this.runWebGLProgram(m,[{dataId:t,shape:i,dtype:n}],n),f=this.readSync(d.dataId);return this.disposeIntermediateTensorInfo(d),f}if(o!=null)return this.convertAndCacheOnCPU(t);if(n==="string")return o;let u=this.activeTimers!=null,c;u&&(c=y.now());let l;if(n==="complex64"){let m=this.readSync(s.real.dataId),d=this.readSync(s.imag.dataId);l=w.mergeRealAndImagArrays(m,d)}else l=this.getValuesFromTexture(t);return u&&(this.downloadWaitMs+=y.now()-c),this.convertAndCacheOnCPU(t,l)}async read(t){if(this.pendingRead.has(t)){let f=this.pendingRead.get(t);return new Promise(h=>f.push(h))}let e=this.texData.get(t),{values:o,shape:n,slice:s,dtype:a,complexTensorInfos:i,isPacked:p}=e;if(s!=null){let f;p?f=new Ar(n,La):f=new tr(n,La);let h=this.runWebGLProgram(f,[{dataId:t,shape:n,dtype:a}],a),g=this.read(h.dataId);return this.disposeIntermediateTensorInfo(h),g}if(o!=null)return this.convertAndCacheOnCPU(t);if(A().getBool("DEBUG")&&!A().getBool("WEBGL_DOWNLOAD_FLOAT_ENABLED")&&A().getNumber("WEBGL_VERSION")===2)throw new Error("tensor.data() with WEBGL_DOWNLOAD_FLOAT_ENABLED=false and WEBGL_VERSION=2 not yet supported.");let u=null,c;if(a!=="complex64"&&A().get("WEBGL_BUFFER_SUPPORTED")){c=this.decode(t);let f=this.texData.get(c.dataId);u=this.gpgpu.createBufferFromTexture(f.texture.texture,...Kl(n))}this.pendingRead.set(t,[]),a!=="complex64"&&await this.gpgpu.createAndWaitForFence();let l;if(a==="complex64"){let f=await Promise.all([this.read(i.real.dataId),this.read(i.imag.dataId)]),h=f[0],g=f[1];l=w.mergeRealAndImagArrays(h,g)}else if(u==null)l=this.getValuesFrom
2022-11-18 17:13:29 +01:00
if (isnan(a)) return a;
if (isnan(b)) return b;
`;var Fr=class{constructor(t,e,o){this.variableNames=["A","B"],this.outputShape=w.assertAndGetBroadcastShape(e,o),this.enableShapeUniforms=ut(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
float binaryOperation(float a, float b) {
2023-08-05 15:03:11 +02:00
${t}
2022-11-18 17:13:29 +01:00
}
void main() {
float a = getAAtOutCoords();
float b = getBAtOutCoords();
setOutput(binaryOperation(a, b));
}
`}};var jr=`
2022-11-18 17:13:29 +01:00
result.r = isNaN.r ? NAN : result.r;
result.g = isNaN.g ? NAN : result.g;
result.b = isNaN.b ? NAN : result.b;
result.a = isNaN.a ? NAN : result.a;
`;var qr=class{constructor(t,e,o,n=!1){this.variableNames=["A","B"],this.supportsBroadcasting=!0,this.packedInputs=!0,this.packedOutput=!0,this.outputShape=w.assertAndGetBroadcastShape(e,o);let s=this.outputShape.length;this.enableShapeUniforms=ut(s);let a="";if(n)if(s===0||y.sizeFromShape(this.outputShape)===1)a=`
2022-11-18 17:13:29 +01:00
result.y = 0.;
result.z = 0.;
result.w = 0.;
`;else if(a=`
2023-05-08 15:12:41 +02:00
${Re(s)} coords = getOutputCoords();
2022-11-18 17:13:29 +01:00
`,s===1)this.enableShapeUniforms?a+=`
result.y = (coords + 1) >= outShape ? 0. : result.y;
result.z = 0.;
result.w = 0.;
`:a+=`
result.y = (coords + 1) >= ${this.outputShape[0]} ? 0. : result.y;
result.z = 0.;
result.w = 0.;
`;else{let p=Rt("coords",s);this.enableShapeUniforms?a+=`
2022-11-18 17:13:29 +01:00
bool nextRowOutOfBounds =
(${p[s-2]} + 1) >= outShape[${s} - 2];
bool nextColOutOfBounds =
(${p[s-1]} + 1) >= outShape[${s} - 1];
result.y = nextColOutOfBounds ? 0. : result.y;
result.z = nextRowOutOfBounds ? 0. : result.z;
result.w = nextColOutOfBounds || nextRowOutOfBounds ? 0. : result.w;
`:a+=`
bool nextRowOutOfBounds =
(${p[s-2]} + 1) >= ${this.outputShape[s-2]};
bool nextColOutOfBounds =
(${p[s-1]} + 1) >= ${this.outputShape[s-1]};
result.y = nextColOutOfBounds ? 0. : result.y;
result.z = nextRowOutOfBounds ? 0. : result.z;
result.w = nextColOutOfBounds || nextRowOutOfBounds ? 0. : result.w;
`}this.userCode=`
vec4 binaryOperation(vec4 a, vec4 b) {
2023-08-05 15:03:11 +02:00
${t}
2022-11-18 17:13:29 +01:00
}
void main() {
vec4 a = getAAtOutCoords();
vec4 b = getBAtOutCoords();
vec4 result = binaryOperation(a, b);
${a}
setOutput(result);
}
`}};function Dt(r){let{inputs:t,backend:e}=r,{x:o}=t;return e.incRef(o.dataId),{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}var BD={kernelName:Co,backendName:"webgl",kernelFunc:Dt};function Pr(r){let{inputs:t,backend:e}=r,{real:o,imag:n}=t,s=e.makeTensorInfo(o.shape,"complex64"),a=e.texData.get(s.dataId),i=Dt({inputs:{x:o},backend:e}),p=Dt({inputs:{x:n},backend:e});return a.complexTensorInfos={real:i,imag:p},s}var zD={kernelName:Ri,backendName:"webgl",kernelFunc:Pr};var fv="return (a < 0.) ? b * a : a;",hv=`
2022-11-18 17:13:29 +01:00
vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
`;function a9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{alpha:s}=o,a=e.makeTensorInfo([],"float32",y.createScalarValue(s,"float32")),i=A().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new qr(hv,n.shape,a.shape):new Fr(fv,n.shape,a.shape),p=e.runWebGLProgram(i,[n,a],"float32");return e.disposeIntermediateTensorInfo(a),p}var VD={kernelName:En,backendName:"webgl",kernelFunc:a9};var gv="return (a < 0.) ? b * a : a;",xv=`
2022-11-18 17:13:29 +01:00
vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
`;function i9(r){let{inputs:t,backend:e}=r,{x:o,alpha:n}=t,s=A().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new qr(xv,o.shape,n.shape):new Fr(gv,o.shape,n.shape);return e.runWebGLProgram(s,[o,n],"float32")}var WD={kernelName:rs,backendName:"webgl",kernelFunc:i9};var Fo="if (isnan(x)) return x;";function xe({opSnippet:r,packedOpSnippet:t,cpuKernelImpl:e,dtype:o}){return({inputs:n,backend:s})=>{let{x:a}=n,i=s,p=o||a.dtype;if(i.shouldExecuteOnCPU([a])&&e!=null){let l=i.texData.get(a.dataId),m=e(l.values,p);return i.makeTensorInfo(a.shape,p,m)}let u=A().getBool("WEBGL_PACK_UNARY_OPERATIONS")&&t!=null,c;return u?c=new Ar(a.shape,t):c=new tr(a.shape,r),i.runWebGLProgram(c,[a],p)}}function nt({opSnippet:r,packedOpSnippet:t,checkOutOfBounds:e=!1,supportsComplex:o=!1,cpuKernelImpl:n,dtype:s}){return({inputs:a,backend:i})=>{let{a:p,b:u}=a,c=i;if(o&&p.dtype==="complex64"){let f=c.texData.get(p.dataId),h=c.texData.get(u.dataId),[g,x]=[[f.complexTensorInfos.real,h.complexTensorInfos.real],[f.complexTensorInfos.imag,h.complexTensorInfos.imag]].map(C=>{let[S,k]=C,_={dataId:S.dataId,dtype:S.dtype,shape:p.shape},E={dataId:k.dataId,dtype:k.dtype,shape:u.shape},R=new Fr(r,p.shape,u.shape);return c.runWebGLProgram(R,[_,E],dt(S.dtype,k.dtype))}),b=Pr({inputs:{real:g,imag:x},backend:c});return c.disposeIntermediateTensorInfo(g),c.disposeIntermediateTensorInfo(x),b}let l=s||dt(p.dtype,u.dtype);if((p.dtype==="string"||u.dtype==="string"||c.shouldExecuteOnCPU([p,u]))&&n!=null){let f=c.texData.get(p.dataId).values,h=c.texData.get(u.dataId).values,g=p.dtype==="string"?w.fromUint8ToStringArray(f):f,x=p.dtype==="string"?w.fromUint8ToStringArray(h):h,[b,C]=n(p.shape,u.shape,g,x,l),S=c.makeTensorInfo(C,l),k=c.texData.get(S.dataId);return k.values=b,S}let m=A().getBool("WEBGL_PACK_BINARY_OPERATIONS")&&t!=null,d;return m?d=new qr(t,p.shape,u.shape,e):d=new Fr(r,p.shape,u.shape),c.runWebGLProgram(d,[p,u],l)}}function xi(r,t=!1){if(r==="linear")return t?AD:TD;if(r==="relu")return t?PD:$D;if(r==="elu")return t?FD:_D;if(r==="relu6")return t?OD:ED;if(r==="prelu")return t?xv:gv;if(r==="leakyrelu")return t?hv:fv;if(r==="sigmoid")return t?MD:RD;throw new Error(`Activation ${r} has not been implemented for the WebGL backend.`)}var Lc=class{constructor(t,e,o,n=!1,s=!1,a=!1,i=null,p=!1,u=!1){this.variableNames=["matrixA","matrixB"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=o,this.enableShapeUniforms=ut(this.outputShape.length);let c=n?t[1]:t[2],l=Math.ceil(c/2),m=n?"i * 2, rc.y":"rc.y, i * 2",d=s?"rc.z, i * 2":"i * 2, rc.z",f=n?["a.xxyy","a.zzww"]:["a.xxzz","a.yyww"],h=s?["b.xzxz","b.ywyw"]:["b.xyxy","b.zwzw"],g="",x="";i&&(p?g=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getPreluActivationWeightsAtOutCoords();
${i}
}`:u?g=`vec4 activation(vec4 a) {
vec4 b = getLeakyreluAlphaAtOutCoords();
${i}
}`:g=`vec4 activation(vec4 x) {
${i}
2023-08-05 15:03:11 +02:00
}`,x="result = activation(result);");let b=a?"result += getBiasAtOutCoords();":"";a&&this.variableNames.push("bias"),p&&this.variableNames.push("preluActivationWeights"),u&&this.variableNames.push("leakyreluAlpha");let C="rc.x",S="rc.x";t[0]<e[0]?C=`imod(rc.x, ${t[0]})`:e[0]<t[0]&&(S=`imod(rc.x, ${e[0]})`),this.userCode=`
2022-11-18 17:13:29 +01:00
${g}
// Don't use uniform for sharedDimensionPacked for performance.
const float sharedDimension = ${l}.0;
vec4 dot2x2ARowBCol(ivec3 rc) {
vec4 result = vec4(0);
int batchA = ${C};
2023-05-08 15:12:41 +02:00
int batchB = ${S};
2022-11-18 17:13:29 +01:00
for (int i = 0; i < ${l}; i++) {
vec4 a = getMatrixA(batchA, ${m});
2022-11-20 22:20:02 +01:00
vec4 b = getMatrixB(batchB, ${d});
2022-11-18 17:13:29 +01:00
// These swizzled products need to be separately added.
// See: https://github.com/tensorflow/tfjs/issues/1735
2022-11-20 22:20:02 +01:00
result += (${f[0]} * ${h[0]});
result += (${f[1]} * ${h[1]});
2022-11-18 17:13:29 +01:00
}
return result;
}
void main() {
ivec3 rc = getOutputCoords();
vec4 result = dot2x2ARowBCol(rc);
${b}
2022-11-20 22:20:02 +01:00
${x}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
`}};var yv={REAL:"return areal * breal - aimag * bimag;",IMAG:"return areal * bimag + aimag * breal;"},Zl=class{constructor(t,e,o){this.variableNames=["AReal","AImag","BReal","BImag"],this.outputShape=w.assertAndGetBroadcastShape(e,o),this.userCode=`
2022-11-18 17:13:29 +01:00
float binaryOpComplex(
float areal, float aimag, float breal, float bimag) {
2023-08-05 15:03:11 +02:00
${t}
2022-11-18 17:13:29 +01:00
}
void main() {
float areal = getARealAtOutCoords();
float aimag = getAImagAtOutCoords();
float breal = getBRealAtOutCoords();
float bimag = getBImagAtOutCoords();
setOutput(binaryOpComplex(areal, aimag, breal, bimag));
}
`}};var UD="return a * b;";function Jl(r){let{inputs:t,backend:e}=r,{a:o,b:n}=t,s=w.upcastType(o.dtype,n.dtype);if(o.dtype==="complex64"){let i=e.texData.get(o.dataId),p=e.texData.get(n.dataId),u=new Zl(yv.REAL,o.shape,n.shape),c=new Zl(yv.IMAG,o.shape,n.shape),l=[{dataId:i.complexTensorInfos.real.dataId,dtype:i.complexTensorInfos.real.dtype,shape:o.shape},{dataId:i.complexTensorInfos.imag.dataId,dtype:i.complexTensorInfos.imag.dtype,shape:o.shape},{dataId:p.complexTensorInfos.real.dataId,dtype:p.complexTensorInfos.real.dtype,shape:n.shape},{dataId:p.complexTensorInfos.imag.dataId,dtype:p.complexTensorInfos.imag.dtype,shape:n.shape}],m=e.runWebGLProgram(u,l,"float32"),d=e.runWebGLProgram(c,l,"float32"),f=Pr({inputs:{real:m,imag:d},backend:e});return e.disposeIntermediateTensorInfo(m),e.disposeIntermediateTensorInfo(d),f}if(e.shouldExecuteOnCPU([o,n])){let i=e.texData.get(o.dataId),p=e.texData.get(n.dataId),[u,c]=JR(o.shape,n.shape,i.values,p.values,s),l=e.makeTensorInfo(c,s),m=e.texData.get(l.dataId);return m.values=u,l}let a;return A().getBool("WEBGL_PACK_BINARY_OPERATIONS")?a=new qr(UD,o.shape,n.shape):a=new Fr(UD,o.shape,n.shape),e.runWebGLProgram(a,[o,n],s)}var GD={kernelName:Xn,backendName:"webgl",kernelFunc:Jl};function HD(r,t,e){let o=[hi(r.shape),...gi(r.shape)],n={dtype:r.dtype,shape:o,dataId:r.dataId},s=[hi(t),...gi(t)],a=new Pc(s,o),i=!0,p=[o],u=e.runWebGLProgram(a,[n],r.dtype,p,i);return{dataId:u.dataId,shape:t,dtype:u.dtype}}function te(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{shape:s}=o,a=e,i=y.sizeFromShape(n.shape),p=y.inferFromImplicitShape(s,i),u=y.sizeFromShape(p);y.assert(i===u,()=>`The new shape (${p}) has ${u} elements and the old shape (${n.shape}) has ${i} elements. The new shape and old shape must have the same number of elements.`);let c=a.texData.get(n.dataId);return c.isPacked&&!hu(n.shape,p)&&!(c.texture!==null&&hu(c.shape,p))?HD(n,p,a):(a.incRef(n.dataId),{dataId:n.dataId,shape:p,dtype:n.dtype})}var KD={kernelName:da,backendName:"webgl",kernelFunc:te};var em=class{constructor(t,e){this.variableNames=["x"];let{windowSize:o,batchSize:n,inSize:s,outSize:a}=t;this.outputShape=[n,a];let i=Math.floor(o/4)*4,p=o%4,u="sumValue += dot(values, ones);";if(e!=null){let l=1/e;u=`sumValue += dot(values * ${y.isInt(l)?l.toPrecision(2):l}, ones);`}let c="";s%o>0&&(c=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return 0.0;
}
`),this.userCode=`
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float getValue(int batch, int inIdx) {
${c}
return getX(batch, inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${o};
float sumValue = 0.0;
for (int i = 0; i < ${i}; i += 4) {
int inIdx = inOffset + i;
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
${u}
}
int inIdx = inOffset + ${i};
if (${p===1}) {
vec4 values = vec4(getValue(batch, inIdx), 0.0, 0.0, 0.0);
${u}
} else if (${p===2}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1), 0.0, 0.0);
${u}
} else if (${p===3}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2), 0.0);
${u}
}
setOutput(sumValue);
}
`}};var hh=class{constructor(t,e){this.variableNames=["x"];let{windowSize:o,batchSize:n,inSize:s,outSize:a}=t;this.outputShape=[n,a];let i="0.0",p="";e==="prod"?i="1.0":e==="min"?(i="1.0 / 1e-20",p="min"):e==="max"&&(i="-1.0 / 1e-20",p="max");let u=`${e}(${e}(${e}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;e==="sum"?u="sumValue":e==="prod"?u="prodValue":e==="all"?u="allValue":e==="any"&&(u="anyValue");let c=Math.floor(o/4)*4,l=o%4,m=`
2023-08-05 15:03:11 +02:00
if (${e==="sum"}) {
2022-11-18 17:13:29 +01:00
sumValue += dot(values, ones);
2023-08-05 15:03:11 +02:00
} else if (${e==="prod"}) {
2022-11-18 17:13:29 +01:00
vec2 tmp = vec2(values[0], values[1]) * vec2(values[2], values[3]);
prodValue *= tmp[0] * tmp[1];
} else {
minMaxValue = ${p}(values, minMaxValue);
2023-08-05 15:03:11 +02:00
if (${e==="min"} || ${e==="max"}) {
2022-11-18 17:13:29 +01:00
minMaxValue = ${p}(values, minMaxValue);
bvec4 isNaN = isnan(values);
if (isNaN.r || isNaN.g || isNaN.b || isNaN.a) {
minMaxValue = vec4(NAN);
}
}
}
2023-08-05 15:03:11 +02:00
`,d="vec4";e==="all"?(i="1.0",m=`
2022-11-18 17:13:29 +01:00
bool reducedAllValue = all(values);
float floatedReducedAllValue = float(reducedAllValue);
allValue = float(allValue >= 1.0 && floatedReducedAllValue >= 1.0);
2023-08-05 15:03:11 +02:00
`,d="bvec4"):e==="any"&&(i="0.0",m=`
2022-11-18 17:13:29 +01:00
bool reducedAnyValue = any(values);
float floatedReducedAnyValue = float(reducedAnyValue);
anyValue = float(anyValue >= 1.0 || floatedReducedAnyValue >= 1.0);
2022-11-20 22:20:02 +01:00
`,d="bvec4");let f="";s%o>0&&(f=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return initializationValue;
}
`),this.userCode=`
const float initializationValue = ${i};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float getValue(int batch, int inIdx) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
return getX(batch, inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${o};
vec4 minMaxValue = vec4(${i});
float prodValue = 1.0;
float sumValue = 0.0;
float allValue = 1.0;
float anyValue = 0.0;
for (int i = 0; i < ${c}; i += 4) {
int inIdx = inOffset + i;
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
${m}
}
int inIdx = inOffset + ${c};
if (${l===1}) {
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
initializationValue,
initializationValue,
initializationValue
);
${m}
} else if (${l===2}) {
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
initializationValue,
initializationValue
);
${m}
} else if (${l===3}) {
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
initializationValue
);
${m}
}
setOutput(${u});
}
`}};function p9(r){let t=[];for(;t.length===0||t[t.length-1].outSize!==1;){let e=t.length?t[t.length-1].outSize:r[1],o=w.computeOptimalWindowSize(e);t.push({inSize:e,windowSize:o,outSize:Math.ceil(e/o)})}return t}function Xr(r,t,e,o){let n=p9(r.shape),s=r;for(let a=0;a<n.length;a++){let{inSize:i,windowSize:p,outSize:u}=n[a],c,l;e==="mean"?c=a===0?new em({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u},i):new em({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u}):c=new hh({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u},e),l=s,s=o.runWebGLProgram(c,[s],t),l.dataId!==r.dataId&&o.disposeIntermediateTensorInfo(l)}return s}var gh=class{constructor(t,e){this.variableNames=["A"];let o=new Array(t.length);for(let a=0;a<o.length;a++)o[a]=t[e[a]];this.outputShape=o,this.rank=o.length;let n=Re(this.rank),s=c9(e);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} resRC = getOutputCoords();
setOutput(getA(${s}));
}
`}};function c9(r){let t=r.length;if(t>6)throw Error(`Transpose for rank ${t} is not yet supported`);let e=["resRC.x","resRC.y","resRC.z","resRC.w","resRC.u","resRC.v"],o=new Array(t);for(let n=0;n<r.length;n++)o[r[n]]=e[n];return o.join()}var xh=class{constructor(t,e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0;let o=new Array(t.length);for(let c=0;c<o.length;c++)o[c]=t[e[c]];if(this.outputShape=o,this.rank=o.length,this.rank>6)throw Error(`Packed transpose for rank ${this.rank} is not yet supported.`);let n=Re(this.rank),s=mv("rc",this.rank),a=new Array(this.rank);for(let c=0;c<e.length;c++)a[e[c]]=s[c];let i=`vec2(${a.slice(-2).join()})`,p=`++${s[this.rank-1]} < ${o[this.rank-1]}`,u=`getChannel(getA(${a.join()}), ${i})`;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} rc = getOutputCoords();
vec4 result = vec4(0.);
result[0] = ${u};
if(${p}) {
result[1] = ${u};
}
--${s[this.rank-1]};
if(++${s[this.rank-2]} < ${o[this.rank-2]}) {
result[2] = ${u};
if(${p}) {
result[3] = ${u};
}
}
setOutput(result);
}
`}};function gu(r,t,e){let o=A().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new xh(r.shape,t):new gh(r.shape,t);return e.runWebGLProgram(o,[r],r.dtype)}function qD(r,t,e,o){let n=t,s=r.shape.length,a=y.parseAxisParam(n,r.shape),i=a,p=w.getAxesPermutation(i,s),u=p!=null,c=r;u&&(c=gu(r,p,o),i=w.getInnerMostAxes(i.length,s)),w.assertAxesAreInnerMostDims("sum",i,s);let[l,m]=w.computeOutAndReduceShapes(c.shape,i),d=l;e&&(d=w.expandShapeToKeepDim(l,a));let f=y.sizeFromShape(m),g=y.sizeFromShape(r.shape)/f,x=te({inputs:{x:c},attrs:{shape:[g,f]},backend:o}),b=oi(r.dtype),C=Xr(x,b,"sum",o),S=te({inputs:{x:C},attrs:{shape:d},backend:o});return o.disposeIntermediateTensorInfo(x),o.disposeIntermediateTensorInfo(C),u&&o.disposeIntermediateTensorInfo(c),S}function bp(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o;return qD(n,s,a,e)}var jD={kernelName:Ss,backendName:"webgl",kernelFunc:bp};function bt(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{perm:s}=o,a=e,i=n.shape.length,p=new Array(i);for(let c=0;c<p.length;c++)p[c]=n.shape[s[c]];let u;if(a.shouldExecuteOnCPU([n])){let l=a.texData.get(n.dataId).values,m=yp(l,n.shape,n.dtype,s,p);u=a.makeTensorInfo(p,n.dtype);let d=a.texData.get(u.dataId);d.values=m}else u=gu(n,s,a);return u}var XD={kernelName:po,backendName:"webgl",kernelFunc:bt};var bv=1e3;function Cp({a:r,b:t,transposeA:e,transposeB:o,backend:n,bias:s=null,preluActivationWeights:a=null,leakyreluAlpha:i=0,activation:p=null}){let u=r.shape.length,c=t.shape.length,l=e?r.shape[u-2]:r.shape[u-1],m=o?t.shape[c-1]:t.shape[c-2],d=e?r.shape[u-1]:r.shape[u-2],f=o?t.shape[c-2]:t.shape[c-1],h=r.shape.slice(0,-2),g=t.shape.slice(0,-2),x=y.sizeFromShape(h),b=y.sizeFromShape(g),S=Sr.assertAndGetBroadcastShape(r.shape.slice(0,-2),t.shape.slice(0,-2)).concat([d,f]);y.assert(l===m,()=>`Error in matMul: inner shapes (${l}) and (${m}) of Tensors with shapes ${r.shape} and ${t.shape} and transposeA=${e} and transposeB=${o} must match.`);let k=e?[x,l,d]:[x,d,l],_=o?[b,f,m]:[b,m,f],E=te({inputs:{x:r},backend:n,attrs:{shape:k}}),R=te({inputs:{x:t},backend:n,attrs:{shape:_}}),D=[E,R],P=Math.max(x,b),O=e?E.shape[1]:E.shape[2],M=s!=null,L=a!=null,B=p==="leakyrelu",z=p!=null?xi(p,!0):null,U=M||L||B||z!=null,j;if((d===1||f===1)&&O>bv&&U===!1){let Y=E,J=R;e&&(Y=bt({inputs:{x:E},backend:n,attrs:{perm:[0,2,1]}}),D.push(Y)),o&&(J=bt({inputs:{x:R},backend:n,attrs:{perm:[0,2,1]}}),D.push(J));let re=f!==1,ne=f===1,ee=Y;re&&(ee=te({inputs:{x:Y},backend:n,attrs:{shape:[P,O,1]}}),D.push(ee));let oe=f===1?2:1,ie=J;ne&&(ie=te({inputs:{x:J},backend:n,attrs:{shape:[P,1,O]}}),D.push(ie));let le=Jl({inputs:{a:ee,b:ie},backend:n});j=bp({inputs:{x:le},backend:n,attrs:{axis:oe,keepDims:!0}}),D.push(le)}else{let Y=dt(r.dtype,t.dtype),J=new Lc(k,_,[P,d,f],e,o,M,z,L,B),re=[E,R];if(s!=null&&re.push(s),L&&re.push(a),B){let ne=n.makeTensorInfo([],"float32",y.createScalarValue(i,"float32"));re.push(ne),D.push(ne)}j=n.runWebGLProgram(J,re,Y)}let q=te({inputs:{x:j},backend:n,attrs:{shape:S}});D.push(j);for(let Y of D)n.disposeIntermediateTensorInfo(Y);return q}function l9(r){let{inputs:t,backend:e,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=t,{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o;return Cp({a:n,b:s,transposeA:p,transposeB:u,backend:e,bias:a,preluActivationWeights:i,leakyreluAlpha:l,activation:c})}var YD={kernelName:So,backendName:"webgl",kernelFunc:l9};var QD="return abs(x);";function m9(r){let{inputs:t,backend:e}=r,{x:o}=t;if(e.shouldExecuteOnCPU([o])&&o.dtype!=="complex64"){let s=e.texData.get(o.dataId),a=ph(s.values);return e.makeTensorInfo(o.shape,o.dtype,a)}let n;return A().getBool("WEBGL_PACK_UNARY_OPERATIONS")?n=new Ar(o.shape,QD):n=new tr(o.shape,QD),e.runWebGLProgram(n,[o],o.dtype)}var ZD={kernelName:Xs,backendName:"webgl",kernelFunc:m9};var d9=Wt+`
2022-11-18 17:13:29 +01:00
if (abs(x) > 1.) {
return NAN;
}
return acos(x);
`,f9=xe({opSnippet:d9}),JD={kernelName:Vo,backendName:"webgl",kernelFunc:f9};var h9=Wt+`
2022-11-18 17:13:29 +01:00
if (x < 1.0) return NAN;
return log(x + sqrt(x * x - 1.0));`,g9=xe({opSnippet:h9}),eA={kernelName:Wo,backendName:"webgl",kernelFunc:g9};var tA="return a + b;",x9=nt({opSnippet:tA,packedOpSnippet:tA,supportsComplex:!0,cpuKernelImpl:DR}),rA={kernelName:io,backendName:"webgl",kernelFunc:x9};var yh=class{constructor(t,e){this.outputShape=[],this.outputShape=t,this.variableNames=e.map((s,a)=>`T${a}`);let o=[];this.variableNames.forEach(s=>{o.push(`float v${s} = get${s}AtOutCoords();`)});let n=this.variableNames.map(s=>`v${s}`).join(" + ");this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o.join(`
`)}
float result = ${n};
setOutput(result);
}
`}};var bh=class{constructor(t,e){this.outputShape=[],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=t,this.variableNames=e.map((s,a)=>`T${a}`);let o=[];this.variableNames.forEach(s=>{o.push(`vec4 v${s} = get${s}AtOutCoords();`)});let n=this.variableNames.map(s=>`v${s}`).join(" + ");this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o.join(`
`)}
vec4 result = ${n};
setOutput(result);
}
`}};function Ch(r){let{inputs:t,backend:e}=r,o=t;if(o.length===1)return Dt({inputs:{x:o[0]},backend:e});if(o.length>A().getNumber("WEBGL_MAX_TEXTURES_IN_SHADER")){let p=Math.floor(o.length/2),u=Ch({inputs:o.slice(0,p),backend:e}),c=Ch({inputs:o.slice(p),backend:e});return Ch({inputs:[u,c],backend:e})}let n=o.map(p=>p.dtype).reduce((p,u)=>dt(p,u)),s=o.map(p=>p.shape),i=A().getBool("WEBGL_PACK")?new bh(o[0].shape,s):new yh(o[0].shape,s);return e.runWebGLProgram(i,o,n)}var oA={kernelName:Uo,backendName:"webgl",kernelFunc:Ch};function y9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=w.getAxesPermutation(u,i),l=n;c!=null&&(l=bt({inputs:{x:n},backend:e,attrs:{perm:c}}),u=w.getInnerMostAxes(u.length,i)),w.assertAxesAreInnerMostDims("all",u,i);let[m,d]=w.computeOutAndReduceShapes(l.shape,u),f=y.sizeFromShape(d),h=te({inputs:{x:l},backend:e,attrs:{shape:[-1,f]}}),g=Xr(h,h.dtype,"all",e),x;if(a){let b=w.expandShapeToKeepDim(m,p);x=te({inputs:{x:g},backend:e,attrs:{shape:b}})}else x=te({inputs:{x:g},backend:e,attrs:{shape:m}});return e.disposeIntermediateTensorInfo(h),e.disposeIntermediateTensorInfo(g),c!=null&&e.disposeIntermediateTensorInfo(l),x}var nA={kernelName:Go,backendName:"webgl",kernelFunc:y9};function b9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=w.getAxesPermutation(u,i),l=n;c!=null&&(l=bt({inputs:{x:n},backend:e,attrs:{perm:c}}),u=w.getInnerMostAxes(u.length,i)),w.assertAxesAreInnerMostDims("any",u,i);let[m,d]=w.computeOutAndReduceShapes(l.shape,u),f=y.sizeFromShape(d),h=te({inputs:{x:l},backend:e,attrs:{shape:[-1,f]}}),g=Xr(h,h.dtype,"any",e),x;if(a){let b=w.expandShapeToKeepDim(m,p);x=te({inputs:{x:g},backend:e,attrs:{shape:b}})}else x=te({inputs:{x:g},backend:e,attrs:{shape:m}});return e.disposeIntermediateTensorInfo(h),e.disposeIntermediateTensorInfo(g),c!=null&&e.disposeIntermediateTensorInfo(l),x}var sA={kernelName:Ho,backendName:"webgl",kernelFunc:b9};var wh=class{constructor(t,e,o){this.variableNames=["A"];let{windowSize:n,batchSize:s,outSize:a}=t;o||this.variableNames.push("bestIndicesA"),this.outputShape=[s,a];let i=e==="max"?">":"<",p=o?"inOffset + i;":"round(getBestIndicesA(batch, inOffset + i));";this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${n};
int bestIndex = inOffset;
float bestValue = getA(batch, bestIndex);
for (int i = 0; i < ${n}; i++) {
int inIdx = ${p};
float candidate = getA(batch, inIdx);
if (candidate ${i} bestValue) {
bestValue = candidate;
bestIndex = inIdx;
}
}
setOutput(float(bestIndex));
}
`}};var Sh=class{constructor(t,e,o,n){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,y.assert(t.length>2,()=>`Packed arg${o.charAt(0).toUpperCase()+o.slice(1)} supports only inputs with rank above 2.`);let s=t[t.length-1],a=Math.ceil(s/e);this.outputShape=t.slice(0,-1),a>1&&this.outputShape.push(a),n||this.variableNames.push("bestIndicesA");let i=this.outputShape,p=i.length,u=Re(p),c=Rt("coords",p),l,m;if(a===1){m=p+1;let R=Re(m);l=`
2023-05-08 15:12:41 +02:00
${R} sourceLocR = ${R}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
++${c[p-1]};
2023-05-08 15:12:41 +02:00
${R} sourceLocG = ${R}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
++${c[p-2]};
2023-05-08 15:12:41 +02:00
${R} sourceLocA = ${R}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
--${c[p-1]};
2023-05-08 15:12:41 +02:00
${R} sourceLocB = ${R}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
--${c[p-2]};`}else m=p,l=`
${u} sourceLocR = coords;
++${c[p-1]};
${u} sourceLocG = coords;
++${c[p-2]};
${u} sourceLocA = coords;
--${c[p-1]};
${u} sourceLocB = coords;
--${c[p-2]};`;let d=["x","y","z","w","u","v"].slice(0,m),f="."+d[m-1],h=d.map(R=>"int "+R),g=Rt("sourceLocR",m-1).concat("inIdx.r"),x=Rt("sourceLocG",m-1).concat("inIdx.g"),b=Rt("sourceLocB",m-1).concat("inIdx.b"),C=Rt("sourceLocA",m-1).concat("inIdx.a"),S=o==="max"?"greaterThan":"lessThan",k=n?"":`
2022-11-18 17:13:29 +01:00
inIdx = round(vec4(getBestIndicesAChannel(${g.join()}),
2022-11-20 22:20:02 +01:00
getBestIndicesAChannel(${x.join()}),
2022-11-18 17:13:29 +01:00
getBestIndicesAChannel(${b.join()}),
getBestIndicesAChannel(${C.join()})));`,_=`vec4(
2022-11-18 17:13:29 +01:00
getAChannel(${g.join()}),
2022-11-20 22:20:02 +01:00
hasNextCol ? getAChannel(${x.join()}) : 0.,
2022-11-18 17:13:29 +01:00
hasNextRow ? getAChannel(${b.join()}) : 0.,
hasNextRow && hasNextCol ? getAChannel(${C.join()}) : 0.)`,E=n?"":`
2022-11-18 17:13:29 +01:00
float getBestIndicesAChannel(${h.join()}) {
2022-11-20 22:20:02 +01:00
return getChannel(getBestIndicesA(${d.join()}),
vec2(${d.slice(-2).join()}));
2022-11-18 17:13:29 +01:00
}`;this.userCode=`
float getAChannel(${h.join()}) {
2022-11-20 22:20:02 +01:00
return getChannel(getA(${d.join()}),
vec2(${d.slice(-2).join()}));
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
void main() {
${u} coords = getOutputCoords();
bool hasNextCol = ${c[p-1]} < ${i[p-1]-1};
bool hasNextRow = ${c[p-2]} < ${i[p-2]-1};
${l}
2022-11-20 22:20:02 +01:00
ivec4 srcIdx = ivec4(sourceLocR${f}, sourceLocG${f},
2023-08-05 15:03:11 +02:00
sourceLocB${f}, sourceLocA${f}) * ${e};
2022-11-18 17:13:29 +01:00
ivec4 inIdx = srcIdx;
vec4 bestIndex = vec4(inIdx);
vec4 bestValue = ${_};
2023-08-05 15:03:11 +02:00
for (int i = 0; i < ${e}; i++) {
2022-11-18 17:13:29 +01:00
inIdx = srcIdx;
${k}
vec4 candidate = ${_};
bvec4 nan = isnan(candidate);
bvec4 replace = bvec4(
2023-05-08 15:12:41 +02:00
vec4(${S}(candidate, bestValue)) * (vec4(1.0) - vec4(nan)));
2022-11-18 17:13:29 +01:00
bestValue = vec4(replace.x ? candidate.x : bestValue.x,
replace.y ? candidate.y : bestValue.y,
replace.z ? candidate.z : bestValue.z,
replace.w ? candidate.w : bestValue.w);
bestIndex = mix(bestIndex, vec4(inIdx), vec4(replace));
srcIdx++;
}
setOutput(bestIndex);
}
`}};function aA(r,t,e,o=null){let n=t.shape[0],s=t.shape[1];o!=null&&(n=o.shape[0],s=o.shape[1]);let a=w.computeOptimalWindowSize(s),i={windowSize:a,inSize:s,batchSize:n,outSize:Math.ceil(s/a)},p=new wh(i,e,o==null),u=[t];o!=null&&u.push(o);let c=r.runWebGLProgram(p,u,"int32");if(c.shape[1]===1)return c;let l=aA(r,t,e,c);return r.disposeIntermediateTensorInfo(c),l}function iA(r,t,e,o=null){let n=o!=null?o.shape:t.shape,s=n[n.length-1],a=w.computeOptimalWindowSize(s),i=new Sh(n,a,e,o==null),p=o==null?[t]:[t,o],u=r.runWebGLProgram(i,p,"int32");if(u.shape.length===t.shape.length){let c=iA(r,t,e,u);return r.disposeIntermediateTensorInfo(u),c}return u}function Ih(r,t,e,o){let n=[e];if(w.assertAxesAreInnerMostDims("arg"+o.charAt(0).toUpperCase()+o.slice(1),n,t.shape.length),!A().getBool("WEBGL_PACK_REDUCE")||t.shape.length<=2){let s=[],a=r.texData.get(t.dataId),i=a!==null&&a.isPacked,p=t;i&&(p=r.unpackTensor(t),s.push(p));let[u,c]=w.computeOutAndReduceShapes(p.shape,n),l=y.sizeFromShape(c),m=te({inputs:{x:p},backend:r,attrs:{shape:[-1,l]}});s.push(m);let d=aA(r,m,o);s.push(d);let f=te({inputs:{x:d},backend:r,attrs:{shape:u}});return s.forEach(h=>r.disposeIntermediateTensorInfo(h)),f}return iA(r,t,o)}function C9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=w.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=bt({inputs:{x:n},backend:e,attrs:{perm:i}}),u.push(p),a=w.getInnerMostAxes(a.length,p.shape.length)),w.assertAxesAreInnerMostDims("argMax",[a[0]],p.shape.length);let c=Ih(e,p,a[0],"max");return u.forEach(l=>e.disposeIntermediateTensorInfo(l)),c}var uA={kernelName:Ys,backendName:"webgl",kernelFunc:C9};function w9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=w.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=bt({inputs:{x:n},backend:e,attrs:{perm:i}}),u.push(p),a=w.getInnerMostAxes(a.length,p.shape.length)),w.assertAxesAreInnerMostDims("argMin",[a[0]],p.shape.length);let c=Ih(e,p,a[0],"min");return u.forEach(l=>e.disposeIntermediateTensorInfo(l)),c}var pA={kernelName:Qs,backendName:"webgl",kernelFunc:w9};var S9=Wt+`
2022-11-18 17:13:29 +01:00
if (abs(x) > 1.) {
return NAN;
}
return asin(x);
`,I9=xe({opSnippet:S9}),cA={kernelName:Ko,backendName:"webgl",kernelFunc:I9};var v9=Wt+"return log(x + sqrt(x * x + 1.0));",k9=xe({opSnippet:v9}),lA={kernelName:qo,backendName:"webgl",kernelFunc:k9};var N9=Wt+`
2022-11-18 17:13:29 +01:00
return atan(x);
`,T9=xe({opSnippet:N9}),mA={kernelName:jo,backendName:"webgl",kernelFunc:T9};var _9=Mc+`
2022-11-18 17:13:29 +01:00
return atan(a, b);
2023-08-05 15:03:11 +02:00
`,$9=`
2022-11-18 17:13:29 +01:00
vec4 result = atan(a, b);
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
`+jr+`
2022-11-18 17:13:29 +01:00
return result;
`,E9=nt({opSnippet:_9,packedOpSnippet:$9}),dA={kernelName:Yo,backendName:"webgl",kernelFunc:E9};var R9=Wt+`
2022-11-18 17:13:29 +01:00
if ((x < -1.0) || (x > 1.0)) return NAN;
return (log(1.0 + x) - log(1.0 - x)) / 2.0;`,D9=xe({opSnippet:R9}),fA={kernelName:Xo,backendName:"webgl",kernelFunc:D9};var Us=class{constructor(t,e,o,n=!1,s=!1){if(this.variableNames=["x"],e==="avg"&&o)throw new Error("Cannot compute positions for average pool.");let a=t.filterWidth,i=t.strideHeight,p=t.strideWidth,u=t.dilationHeight,c=t.dilationWidth,l=t.effectiveFilterHeight,m=t.effectiveFilterWidth,d=t.padInfo.top,f=t.padInfo.left;this.outputShape=t.outShape;let h=e==="avg",g=`((batch * ${t.inHeight} + xR) * ${t.inWidth} + xC) * ${t.inChannels} + d`,x=`(xR * ${t.inWidth} + xC) * ${t.inChannels} + d`,b="0.0";if(h||(b="-1.0 / 1e-20"),o){let R=">=";this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 strides = ivec2(${i}, ${p});
2022-11-20 22:20:02 +01:00
const ivec2 pads = ivec2(${d}, ${f});
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d = coords[3];
ivec2 xRCCorner = coords.yz * strides - pads;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// max/min x(?, ?, d) to get y(yR, yC, d).
// ? = to be determined
float minMaxValue = 0.0;
float minMaxValueFound = 0.0;
int minMaxPosition = 0;
float avgValue = 0.0;
for (int wR = 0; wR < ${l};
wR += ${u}) {
int xR = xRCorner + wR;
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
for (int wC = 0; wC < ${m};
wC += ${c}) {
int xC = xCCorner + wC;
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
continue;
}
float value = getX(batch, xR, xC, d);
// If a min / max value has already been found, use it. If not,
// use the current value.
float currMinMaxValue = mix(
value, minMaxValue, minMaxValueFound);
2023-05-08 15:12:41 +02:00
if (value ${R} currMinMaxValue) {
2022-11-18 17:13:29 +01:00
minMaxValue = value;
minMaxValueFound = 1.0;
2022-11-20 22:20:02 +01:00
minMaxPosition = ${n?s?g:x:`wR * ${m} + wC`};
2022-11-18 17:13:29 +01:00
}
}
}
setOutput(float(minMaxPosition));
}
2023-08-05 15:03:11 +02:00
`;return}let C="max",S=`${e}(${e}(${e}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;e==="avg"&&(S="avgValue / max(count, 1.0)");let k=Math.floor(a/4)*4,_=a%4,E=`
2022-11-18 17:13:29 +01:00
if (${h}) {
avgValue += dot(values, ones);
} else {
minMaxValue = ${C}(values, minMaxValue);
2022-11-18 17:13:29 +01:00
}
`;this.userCode=`
const ivec2 strides = ivec2(${i}, ${p});
2022-11-20 22:20:02 +01:00
const ivec2 pads = ivec2(${d}, ${f});
2022-11-18 17:13:29 +01:00
const float initializationValue = ${b};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float count = 0.0;
float getValue(int batch, int xR, int xC, int d) {
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
return initializationValue;
}
count += 1.0;
return getX(batch, xR, xC, d);
}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d = coords[3];
ivec2 xRCCorner = coords.yz * strides - pads;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// max/min x(?, ?, d) to get y(yR, yC, d).
// ? = to be determined
vec4 minMaxValue = vec4(${b});
float avgValue = 0.0;
count = 0.0;
for (int wR = 0; wR < ${l};
wR += ${u}) {
int xR = xRCorner + wR;
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
for (int wC = 0; wC < ${k}; wC += 4) {
int xC = xCCorner + wC * ${c};
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
getValue(batch, xR, xC + 2 * ${c}, d),
getValue(batch, xR, xC + 3 * ${c}, d)
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
}
int xC = xCCorner + ${k};
if (${_===1}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
initializationValue,
initializationValue,
initializationValue
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
} else if (${_===2}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
initializationValue,
initializationValue
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
} else if (${_===3}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
getValue(batch, xR, xC + 2 * ${c}, d),
initializationValue
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
}
}
2023-05-08 15:12:41 +02:00
setOutput(${S});
2022-11-18 17:13:29 +01:00
}
`}},xu=class{constructor(t,e,o,n=!1,s=!1){if(this.variableNames=["x"],e==="avg"&&o)throw new Error("Cannot compute positions for average pool.");let a=t.filterWidth,i=t.strideDepth,p=t.strideHeight,u=t.strideWidth,c=t.dilationDepth,l=t.dilationHeight,m=t.dilationWidth,d=t.effectiveFilterDepth,f=t.effectiveFilterHeight,h=t.effectiveFilterWidth,g=t.padInfo.front,x=t.padInfo.top,b=t.padInfo.left;this.outputShape=t.outShape;let C=e==="avg",S="0.0";if(C||(S="-1.0 / 1e-20"),o){let P=">=";this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec3 strides =
ivec3(${i}, ${p}, ${u});
2022-11-20 22:20:02 +01:00
const ivec3 pads = ivec3(${g}, ${x}, ${b});
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 xCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xDCorner = xCorner.x;
int xRCorner = xCorner.y;
int xCCorner = xCorner.z;
// max/min x(?, ?, ?, ch) to get y(yD, yR, yC, ch).
// ? = to be determined
float minMaxValue = 0.0;
float minMaxValueFound = 0.0;
int minMaxPosition = 0;
2022-11-20 22:20:02 +01:00
for (int wD = 0; wD < ${d};
2022-11-18 17:13:29 +01:00
wD += ${c}) {
int xD = xDCorner + wD;
2023-08-05 15:03:11 +02:00
if (xD < 0 || xD >= ${t.inDepth}) {
2022-11-18 17:13:29 +01:00
continue;
}
2022-11-20 22:20:02 +01:00
for (int wR = 0; wR < ${f};
2022-11-18 17:13:29 +01:00
wR += ${l}) {
int xR = xRCorner + wR;
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
for (int wC = 0; wC < ${h};
wC += ${m}) {
int xC = xCCorner + wC;
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
continue;
}
float value = getX(batch, xD, xR, xC, ch);
// If a min / max value has already been found, use it. If not,
// use the current value.
float currMinMaxValue = mix(
value, minMaxValue, minMaxValueFound);
if (value ${P} currMinMaxValue) {
2022-11-18 17:13:29 +01:00
minMaxValue = value;
minMaxValueFound = 1.0;
2023-08-05 15:03:11 +02:00
minMaxPosition = ${n?s?`(((batch * ${t.inDepth} + xD) * ${t.inHeight} + xR) * ${t.inWidth} + xC) * ${t.inChannels} + ch`:`((xD * ${t.inHeight} + xR) * ${t.inWidth} + xC) * ${t.inChannels} + ch`:`wD * ${f} * ${h} +
2022-11-18 17:13:29 +01:00
wR * ${h} + wC`};
}
}
}
}
setOutput(float(minMaxPosition));
}
2023-08-05 15:03:11 +02:00
`;return}let k="max",_=`${e}(${e}(${e}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;e==="avg"&&(_="avgValue / max(count, 1.0)");let E=Math.floor(a/4)*4,R=a%4,D=`
if (${C}) {
2022-11-18 17:13:29 +01:00
avgValue += dot(values, ones);
} else {
minMaxValue = ${k}(values, minMaxValue);
}
`;this.userCode=`
const ivec3 strides =
ivec3(${i}, ${p}, ${u});
2022-11-20 22:20:02 +01:00
const ivec3 pads = ivec3(${g}, ${x}, ${b});
2023-05-08 15:12:41 +02:00
const float initializationValue = ${S};
2022-11-18 17:13:29 +01:00
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float count = 0.0;
float getValue(int batch, int xD, int xR, int xC, int ch) {
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
return initializationValue;
}
count += 1.0;
return getX(batch, xD, xR, xC, ch);
}
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 xCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xDCorner = xCorner.x;
int xRCorner = xCorner.y;
int xCCorner = xCorner.z;
// max/min x(?, ?, ?, d) to get y(yD, yR, yC, ch).
// ? = to be determined
2023-05-08 15:12:41 +02:00
vec4 minMaxValue = vec4(${S});
2022-11-18 17:13:29 +01:00
float avgValue = 0.0;
count = 0.0;
2022-11-20 22:20:02 +01:00
for (int wD = 0; wD < ${d};
2022-11-18 17:13:29 +01:00
wD += ${c}) {
int xD = xDCorner + wD;
2023-08-05 15:03:11 +02:00
if (xD < 0 || xD >= ${t.inDepth}) {
2022-11-18 17:13:29 +01:00
continue;
}
2022-11-20 22:20:02 +01:00
for (int wR = 0; wR < ${f};
2022-11-18 17:13:29 +01:00
wR += ${l}) {
int xR = xRCorner + wR;
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
2023-01-06 19:23:06 +01:00
for (int wC = 0; wC < ${E}; wC += 4) {
2022-11-18 17:13:29 +01:00
int xC = xCCorner + wC * ${m};
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
getValue(batch, xD, xR, xC + 2 * ${m}, ch),
getValue(batch, xD, xR, xC + 3 * ${m}, ch)
);
2023-05-08 15:12:41 +02:00
${D}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
int xC = xCCorner + ${E};
2023-05-08 15:12:41 +02:00
if (${R===1}) {
2022-11-18 17:13:29 +01:00
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
initializationValue,
initializationValue,
initializationValue
);
2023-05-08 15:12:41 +02:00
${D}
} else if (${R===2}) {
2022-11-18 17:13:29 +01:00
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
initializationValue,
initializationValue
);
2023-05-08 15:12:41 +02:00
${D}
} else if (${R===3}) {
2022-11-18 17:13:29 +01:00
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
getValue(batch, xD, xR, xC + 2 * ${m}, ch),
initializationValue
);
2023-05-08 15:12:41 +02:00
${D}
2022-11-18 17:13:29 +01:00
}
}
}
2023-01-06 19:23:06 +01:00
setOutput(${_});
2022-11-18 17:13:29 +01:00
}
`}};function A9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t;Vs(n,"avgPool");let{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1;y.assert(w.eitherStridesOrDilationsAreOne(a,u),()=>`Error in avgPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${u}'`);let c=w.computePool2DInfo(n.shape,s,a,u,i,p);if(c.filterWidth===1&&c.filterHeight===1&&y.arraysEqual(c.inShape,c.outShape))return Dt({inputs:{x:n},backend:e});let l=new Us(c,"avg",!1);return e.runWebGLProgram(l,[n],"float32")}var hA={kernelName:Qo,backendName:"webgl",kernelFunc:A9};function F9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{filterSize:s,strides:a,pad:i,dimRoundingMode:p,dataFormat:u}=o,c=[1,1,1],l=w.computePool3DInfo(n.shape,s,a,c,i,p,u),m=new xu(l,"avg",!1);return e.runWebGLProgram(m,[n],"float32")}var gA={kernelName:Zs,backendName:"webgl",kernelFunc:F9};var vh=class{constructor(t){this.variableNames=["dy"],this.outputShape=t.inShape;let e=t.filterHeight,o=t.filterWidth,n=t.strideHeight,s=t.strideWidth,a=t.dilationHeight,i=t.dilationWidth,p=t.effectiveFilterHeight,u=t.effectiveFilterWidth,c=p-1-t.padInfo.top,l=u-1-t.padInfo.left,m=1/(e*o);this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${c}, ${l});
const float avgMultiplier = float(${m});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 dyRCCorner = coords.yz - pads;
int dyRCorner = dyRCCorner.x;
int dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${p};
wR += ${a}) {
float dyR = float(dyRCorner + wR) / ${n}.0;
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 || fract(dyR) > 0.0) {
2022-11-18 17:13:29 +01:00
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${u};
wC+= ${i}) {
float dyC = float(dyCCorner + wC) / ${s}.0;
2023-08-05 15:03:11 +02:00
if (dyC < 0.0 || dyC >= ${t.outWidth}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(b, idyR, idyC, d);
dotProd += dyValue * avgMultiplier;
}
}
setOutput(dotProd);
}
`}},kh=class{constructor(t){this.variableNames=["dy"],this.outputShape=t.inShape;let e=t.filterDepth,o=t.filterHeight,n=t.filterWidth,s=t.strideDepth,a=t.strideHeight,i=t.strideWidth,p=t.dilationDepth,u=t.dilationHeight,c=t.dilationWidth,l=t.effectiveFilterDepth,m=t.effectiveFilterHeight,d=t.effectiveFilterWidth,f=l-1-t.padInfo.front,h=m-1-t.padInfo.top,g=d-1-t.padInfo.left,x=1/(e*o*n);this.userCode=`
2022-11-20 22:20:02 +01:00
const ivec3 pads = ivec3(${f}, ${h}, ${g});
const float avgMultiplier = float(${x});
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyDCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, d) with pos mask(:, :, :, ch) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wD = 0; wD < ${l};
wD += ${p}) {
float dyD = float(dyDCorner + wD) / ${s}.0;
2023-08-05 15:03:11 +02:00
if (dyD < 0.0 || dyD >= ${t.outDepth}.0 || fract(dyD) > 0.0) {
2022-11-18 17:13:29 +01:00
continue;
}
int idyD = int(dyD);
for (int wR = 0; wR < ${m};
wR += ${u}) {
float dyR = float(dyRCorner + wR) / ${a}.0;
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
2022-11-20 22:20:02 +01:00
for (int wC = 0; wC < ${d};
2022-11-18 17:13:29 +01:00
wC += ${c}) {
float dyC = float(dyCCorner + wC) / ${i}.0;
2023-08-05 15:03:11 +02:00
if (dyC < 0.0 || dyC >= ${t.outWidth}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(batch, idyD, idyR, idyC, ch);
dotProd += dyValue * avgMultiplier;
}
}
}
setOutput(dotProd);
}
`}};function P9(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s}=t,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=[1,1,1],m=w.computePool3DInfo(a.shape,i,p,l,u,c),d=new kh(m);return e.runWebGLProgram(d,[n],a.dtype)}var xA={kernelName:Ei,backendName:"webgl",kernelFunc:P9};function O9(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s}=t,a=s;Vs([n,s],"avgPoolGrad");let{filterSize:i,strides:p,pad:u}=o,c=w.computePool2DInfo(a.shape,i,p,1,u),l=new vh(c);return e.runWebGLProgram(l,[n],a.dtype)}var yA={kernelName:$i,backendName:"webgl",kernelFunc:O9};function M9(r){let{inputs:t,backend:e,attrs:o}=r,{a:n,b:s}=t,{transposeA:a,transposeB:i}=o;return Cp({a:n,b:s,transposeA:a,transposeB:i,backend:e})}var bA={kernelName:Zo,backendName:"webgl",kernelFunc:M9};var Nh=class{constructor(t,e,o,n,s,a){this.outputShape=[],this.variableNames=["x","mean","variance"],w.assertAndGetBroadcastShape(t,e),w.assertAndGetBroadcastShape(t,o);let i="0.0";n!=null&&(w.assertAndGetBroadcastShape(t,n),this.variableNames.push("offset"),i="getOffsetAtOutCoords()");let p="1.0";s!=null&&(w.assertAndGetBroadcastShape(t,s),this.variableNames.push("scale"),p="getScaleAtOutCoords()"),this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
float x = getXAtOutCoords();
float mean = getMeanAtOutCoords();
float variance = getVarianceAtOutCoords();
float offset = ${i};
float scale = ${p};
float inv = scale * inversesqrt(variance + float(${a}));
setOutput(dot(vec3(x, -mean, offset), vec3(inv, inv, 1)));
}
`}};var Th=class{constructor(t,e,o,n,s,a){this.packedInputs=!0,this.packedOutput=!0,this.variableNames=["x","mean","variance"],w.assertAndGetBroadcastShape(t,e),w.assertAndGetBroadcastShape(t,o);let i="vec4(0.0)";n!=null&&(w.assertAndGetBroadcastShape(t,n),this.variableNames.push("offset"),i="getOffsetAtOutCoords()");let p="vec4(1.0)";s!=null&&(w.assertAndGetBroadcastShape(t,s),this.variableNames.push("scale"),p="getScaleAtOutCoords()"),this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
vec4 offset = ${i};
vec4 scale = ${p};
vec4 x = getXAtOutCoords();
vec4 mean = getMeanAtOutCoords();
vec4 variance = getVarianceAtOutCoords();
vec4 inv = scale * inversesqrt(variance + vec4(${a}));
setOutput((x - mean) * inv + offset);
}
`}};var L9=({inputs:r,backend:t,attrs:e})=>{let{x:o,mean:n,variance:s,offset:a,scale:i}=r;y.assert(n.shape.length===s.shape.length,()=>"Batch normalization gradient requires mean and variance to have equal ranks."),y.assert(a==null||n.shape.length===a.shape.length,()=>"Batch normalization gradient requires mean and offset to have equal ranks."),y.assert(i==null||n.shape.length===i.shape.length,()=>"Batch normalization gradient requires mean and scale to have equal ranks.");let{varianceEpsilon:p}=e;p==null&&(p=.001);let u=[o,n,s],c=null;a!=null&&(c=a.shape,u.push(a));let l=null;i!=null&&(l=i.shape,u.push(i));let m=A().getBool("WEBGL_PACK_NORMALIZATION")?new Th(o.shape,n.shape,s.shape,c,l,p):new Nh(o.shape,n.shape,s.shape,c,l,p);return t.runWebGLProgram(m,u,u[0].dtype)},CA={kernelName:In,backendName:"webgl",kernelFunc:L9};var _h=class{constructor(t){this.variableNames=["source"],this.outputShape=t,this.rank=t.length;let e=Re(this.rank);this.customUniforms=[{name:"start",arrayIndex:this.rank,type:"int"}];let o=B9(this.rank),n,s=t.map((a,i)=>`sourceLoc.${Cv[i]} = start[${i}] + coords.${Cv[i]};`);n=`
2023-08-05 15:03:11 +02:00
${e} sourceLoc;
${e} coords = getOutputCoords();
2022-11-18 17:13:29 +01:00
${s.join(`
`)}
`,this.userCode=`
void main() {
${n}
setOutput(getSource(${o}));
}
`}},Cv=["x","y","z","w","u","v"];function B9(r){if(r===1)return"sourceLoc";if(r<=6)return Cv.slice(0,r).map(t=>"sourceLoc."+t).join(",");throw Error(`Slicing for rank ${r} is not yet supported`)}var $h=class{constructor(t){this.variableNames=["source"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=t,this.rank=t.length,this.customUniforms=[{name:"start",arrayIndex:this.rank,type:"int"}];let e=Re(this.rank),o=Rt("coords",this.rank),n=Rt("sourceLoc",this.rank),s=this.rank===1?"sourceLoc":`vec2(${n.slice(-2).join()})`,a=`getChannel(getSource(${n.join()}), ${s})`,i=`
2022-11-18 17:13:29 +01:00
result.x = ${a};
2023-08-05 15:03:11 +02:00
if (++${o[this.rank-1]} < ${t[this.rank-1]}) {
2022-11-18 17:13:29 +01:00
++${n[this.rank-1]};
result.y = ${a};
--${n[this.rank-1]};
}
`,p=this.rank===1?"":`
--${o[this.rank-1]};
2023-08-05 15:03:11 +02:00
if (++${o[this.rank-2]} < ${t[this.rank-2]}) {
2022-11-18 17:13:29 +01:00
++${n[this.rank-2]};
result.z = ${a};
2023-08-05 15:03:11 +02:00
if (++${o[this.rank-1]} < ${t[this.rank-1]}) {
2022-11-18 17:13:29 +01:00
++${n[this.rank-1]};
result.w = ${a};
}
}
`,u=this.rank<=4?`sourceLoc = coords +
2023-08-05 15:03:11 +02:00
${e}(${t.map((c,l)=>`start[${l}]`).join()});`:t.map((c,l)=>`${n[l]} = ${o[l]} + start[${l}];`).join(`
2022-11-18 17:13:29 +01:00
`);this.userCode=`
void main() {
2023-08-05 15:03:11 +02:00
${e} coords = getOutputCoords();
${e} sourceLoc;
2022-11-18 17:13:29 +01:00
${u}
vec4 result = vec4(0.);
${i}
${p}
setOutput(result);
}
`}};function z9(r,t,e,o){let n=o.texData.get(r.dataId),s=o.makeTensorInfo(e,r.dtype),a=o.texData.get(s.dataId);Object.assign(a,n),a.refCount=1,a.shape=e,a.dtype=r.dtype;let i=pt.computeFlatOffset(t,y.computeStrides(r.shape));n.slice&&(i+=n.slice.flatOffset),a.slice={flatOffset:i,origDataId:n.slice&&n.slice.origDataId||r.dataId};let p=o.dataRefCount.get(a.slice.origDataId)||1;return o.dataRefCount.set(a.slice.origDataId,p+1),s}function Gs(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{begin:s,size:a}=o,[i,p]=pt.parseSliceParams(n,s,a);if(pt.assertParamsValid(n,i,p),y.sizeFromShape(p)===0)return e.makeTensorInfo(p,n.dtype,[]);if(e.shouldExecuteOnCPU([n])||n.dtype==="string"){let l=e.texData.get(n.dataId),m=cD(l.values,i,p,n.shape,n.dtype);return e.makeTensorInfo(p,n.dtype,m)}let{isPacked:u}=e.texData.get(n.dataId),c=pt.isSliceContinous(n.shape,i,p);if(u||!c){let l=A().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new $h(p):new _h(p),m=[i];return e.runWebGLProgram(l,[n],n.dtype,m)}return e.uploadToGPU(n.dataId),z9(n,i,p,e)}var wA={kernelName:ha,backendName:"webgl",kernelFunc:Gs};var V9=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{blockShape:s,crops:a}=o;y.assert(n.shape.length<=4,()=>"batchToSpaceND for rank > 4 with a WebGL backend not implemented yet");let i=s.reduce((b,C)=>b*C),p=w.getReshaped(n.shape,s,i),u=w.getPermuted(p.length,s.length),c=w.getReshapedPermuted(n.shape,s,i),l=w.getSliceBeginCoords(a,s.length),m=w.getSliceSize(c,a,s.length),d=[],f=te({inputs:{x:n},backend:e,attrs:{shape:p}}),h=bt({inputs:{x:f},backend:e,attrs:{perm:u}}),g=te({inputs:{x:h},backend:e,attrs:{shape:c}}),x=Gs({inputs:{x:g},backend:e,attrs:{begin:l,size:m}});return d.push(f),d.push(h),d.push(g),d.forEach(b=>e.disposeIntermediateTensorInfo(b)),x},SA={kernelName:Js,backendName:"webgl",kernelFunc:V9};function W9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,weights:s}=t,{size:a}=o,i=e.readSync(n.dataId),p=e.readSync(s.dataId),u=uh(i,p,s.dtype,s.shape,a);return e.makeTensorInfo([a],s.dtype,u)}var IA={kernelName:Jo,backendName:"webgl",kernelFunc:W9};var U9=`
int r = int(a.r) & int(b.r);
int g = int(a.g) & int(b.g);
int rb = int(a.b) & int(b.b);
int ra = int(a.a) & int(b.a);
return vec4(r, g, rb, ra);
2023-08-05 15:03:11 +02:00
`,G9=`
return float(int(a.r) & int(b.r));
`;function H9(r){let{inputs:t,backend:e}=r,{a:o,b:n}=t,s=A().getBool("WEBGL_PACK_BINARY_OPERATIONS"),a=A().getNumber("WEBGL_VERSION");if(e.shouldExecuteOnCPU([o,n])||a===1){let p=e.texData.get(o.dataId).values,u=e.texData.get(n.dataId).values,[c,l]=FR(o.shape,n.shape,p,u,o.dtype),m=e.makeTensorInfo(l,o.dtype),d=e.texData.get(m.dataId);return d.values=c,m}let i;return s?i=new qr(U9,o.shape,n.shape,!1):i=new Fr(G9,o.shape,n.shape),e.runWebGLProgram(i,[o,n],o.dtype)}var vA={kernelName:qa,backendName:"webgl",kernelFunc:H9};function K9(r){let{inputs:t,backend:e}=r,{s0:o,s1:n}=t,s=e.readSync(o.dataId),a=e.readSync(n.dataId),i=w.assertAndGetBroadcastShape(Array.from(s),Array.from(a));return e.makeTensorInfo([i.length],"int32",Int32Array.from(i))}var kA={kernelName:ea,backendName:"webgl",kernelFunc:K9};var q9="return float(a != b);",wv=nt({opSnippet:q9,cpuKernelImpl:tD,dtype:"bool"}),NA={kernelName:Yn,backendName:"webgl",kernelFunc:wv};function yi(r){let{inputs:t,backend:e}=r,{input:o}=t,n=e.texData.get(o.dataId);return Dt({inputs:{x:n.complexTensorInfos.real},backend:e})}var TA={kernelName:Gi,backendName:"webgl",kernelFunc:yi};var j9="return float(int(x));";function _A(r,t){let e=new tr(r.shape,j9),o=t.runWebGLProgram(e,[r],"int32");return{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}function Sv(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{dtype:s}=o;if(s==="complex64"){if(n.dtype==="complex64")return Dt({inputs:{x:n},backend:e});let a=Ur(n.shape),i=Sv({inputs:{x:n},backend:e,attrs:{dtype:"float32"}}),p=Pr({inputs:{real:i,imag:a},backend:e});return a.dispose(),e.disposeIntermediateTensorInfo(i),p}if(n.dtype==="complex64"){let a=yi({inputs:{input:n},backend:e}),i=Sv({inputs:{x:a},backend:e,attrs:{dtype:s}});return e.disposeIntermediateTensorInfo(a),i}if(!y.hasEncodingLoss(n.dtype,s)){let a=Dt({inputs:{x:n},backend:e});return{dataId:a.dataId,shape:a.shape,dtype:s}}if(e.shouldExecuteOnCPU([n])){let a=e.texData.get(n.dataId).values,[i,p,u]=PR(a,n.shape,n.dtype,s);return e.makeTensorInfo(i,p,u)}if(s==="int32")return _A(n,e);if(s==="bool"){let a=e.makeTensorInfo([],"bool",y.getTypedArrayFromDType("bool",1)),p=wv({inputs:{a:n,b:a},backend:e});return e.disposeIntermediateTensorInfo(a),p}throw new Error(`Error in Cast: failed to cast ${n.dtype} to ${s}`)}var $A={kernelName:yo,backendName:"webgl",kernelFunc:Sv};var EA="return ceil(x);",X9=xe({opSnippet:EA,packedOpSnippet:EA,cpuKernelImpl:OR}),RA={kernelName:en,backendName:"webgl",kernelFunc:X9};var Eh=class{constructor(t){this.variableNames=["A"],this.customUniforms=[{name:"minVal",type:"float"},{name:"maxVal",type:"float"}],this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
float value = getAAtOutCoords();
if (isnan(value)) {
setOutput(value);
return;
}
setOutput(clamp(value, minVal, maxVal));
}
`}};var Rh=class{constructor(t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"minVal",type:"float"},{name:"maxVal",type:"float"}],this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
vec4 value = getAAtOutCoords();
if (any(isnan(value))) {
setOutput(value);
return;
}
setOutput(clamp(value, vec4(minVal), vec4(maxVal)));
}
`}};function Y9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{clipValueMin:s,clipValueMax:a}=o,i;A().getBool("WEBGL_PACK_CLIP")?i=new Rh(n.shape):i=new Eh(n.shape);let p=[[s],[a]];return e.runWebGLProgram(i,[n],n.dtype,p)}var DA={kernelName:bo,backendName:"webgl",kernelFunc:Y9};var Dh=class{constructor(t){this.variableNames=["real","imag"],this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
float re = abs(getRealAtOutCoords());
float im = abs(getImagAtOutCoords());
float mx = max(re, im);
// sadly the length function in glsl is not underflow-safe
// (at least not on Intel GPUs). So the safe solution is
// to ensure underflow-safety in all cases.
setOutput(
mx == 0.0 ? 0.0 : mx * length(vec2(1, min(re, im)/mx))
);
}
`}};function AA(r,t){return{dataId:t.dataId,dtype:t.dtype,shape:r.shape}}function Q9(r){let{inputs:t,backend:e}=r,{x:o}=t,n=e.texData.get(o.dataId),s=new Dh(o.shape),a=[AA(o,n.complexTensorInfos.real),AA(o,n.complexTensorInfos.imag)];return e.runWebGLProgram(s,a,a[0].dtype)}var FA={kernelName:Di,backendName:"webgl",kernelFunc:Q9};var Ah=class{constructor(t){this.outputShape=[],this.outputShape=w.computeOutShape(t,1),this.variableNames=t.map((a,i)=>`T${i}`);let e=new Array(t.length-1);e[0]=t[0][1];for(let a=1;a<e.length;a++)e[a]=e[a-1]+t[a][1];let o=[`if (yC < ${e[0]}) setOutput(getT0(yR, yC));`];for(let a=1;a<e.length;a++){let i=e[a-1];o.push(`else if (yC < ${e[a]}) setOutput(getT${a}(yR, yC-${i}));`)}let n=e.length,s=e[e.length-1];o.push(`else setOutput(getT${n}(yR, yC-${s}));`),this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int yR = coords.x;
int yC = coords.y;
${o.join(`
`)}
}
`}};var Ph=class{constructor(t,e){this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[],this.outputShape=w.computeOutShape(t,e);let o=this.outputShape,n=o.length,s=Re(n),a=Rt("coords",n),i=["x","y","z","w","u","v"].slice(0,n);this.variableNames=t.map((h,g)=>`T${g}`);let p=new Array(t.length-1);p[0]=t[0][e];for(let h=1;h<p.length;h++)p[h]=p[h-1]+t[h][e];let u=i[e],c=i.slice(-2),l=i.join(),m=`if (${u} < ${p[0]}) {
2022-11-18 17:13:29 +01:00
return getChannel(
getT0(${l}), vec2(${c.join()}));
}`;for(let h=1;h<p.length;h++){let g=p[h-1];m+=`
if (${u} < ${p[h]} && ${u} >= ${p[h-1]}) {
return getChannel(
getT${h}(${Fh(i,u,g)}),
vec2(${Fh(c,u,g)}));
2022-11-20 22:20:02 +01:00
}`}let d=p.length,f=p[p.length-1];m+=`
2022-11-18 17:13:29 +01:00
return getChannel(
getT${d}(${Fh(i,u,f)}),
vec2(${Fh(c,u,f)}));`,this.userCode=`
2022-11-18 17:13:29 +01:00
float getValue(${i.map(h=>"int "+h)}) {
${m}
}
void main() {
${s} coords = getOutputCoords();
vec4 result = vec4(getValue(${a}), 0., 0., 0.);
${a[n-1]} = ${a[n-1]} + 1;
if (${a[n-1]} < ${o[n-1]}) {
result.g = getValue(${a});
}
${a[n-2]} = ${a[n-2]} + 1;
if (${a[n-2]} < ${o[n-2]}) {
result.a = getValue(${a});
}
${a[n-1]} = ${a[n-1]} - 1;
if (${a[n-2]} < ${o[n-2]} &&
${a[n-1]} < ${o[n-1]}) {
result.b = getValue(${a});
}
setOutput(result);
}
`}};function Fh(r,t,e){let o=r.indexOf(t);return r.map((s,a)=>a===o?`${s} - ${e}`:s).join()}function wp(r){let{inputs:t,backend:e}=r,{input:o}=t,n=e.texData.get(o.dataId);return Dt({inputs:{x:n.complexTensorInfos.imag},backend:e})}var PA={kernelName:Vi,backendName:"webgl",kernelFunc:wp};function Bc(r,t,e){let o=r[0].dtype;if(o==="complex64"){let d=r.map(b=>yi({inputs:{input:b},backend:e})),f=r.map(b=>wp({inputs:{input:b},backend:e})),h=Bc(d,t,e),g=Bc(f,t,e),x=Pr({inputs:{real:h,imag:g},backend:e});return d.forEach(b=>e.disposeIntermediateTensorInfo(b)),f.forEach(b=>e.disposeIntermediateTensorInfo(b)),e.disposeIntermediateTensorInfo(h),e.disposeIntermediateTensorInfo(g),x}let n=e.shouldExecuteOnCPU(r);if(o==="string"&&(n=!0),n){let d=r.map(S=>{let _=[-1,y.sizeFromShape(S.shape.slice(t))];return te({inputs:{x:S},backend:e,attrs:{shape:_}})}),f=d.map(S=>({vals:e.readSync(S.dataId),shape:S.shape})),h=w.computeOutShape(d.map(S=>S.shape),1),g=d[0].shape[0]===1,x=MR(f,h,o,g),b=w.computeOutShape(r.map(S=>S.shape),t),C=e.makeTensorInfo(b,o,x);return d.forEach(S=>e.disposeIntermediateTensorInfo(S)),C}let s=r.filter(d=>y.sizeFromShape(d.shape)>0),a=A().getBool("WEBGL_PACK_ARRAY_OPERATIONS")&&s[0].shape.length>1;if(s.length===1){let d=a?new tr(r[0].shape,La):new Ar(r[0].shape,La);return e.runWebGLProgram(d,r,o)}let i=A().getNumber("WEBGL_MAX_TEXTURES_IN_SHADER");if(s.length>i){let d=[];for(let h=0;h<s.length;h+=i){let g=s.slice(h,h+i);d.push(Bc(g,t,e))}let f=Bc(d,t,e);for(let h of d)e.disposeIntermediateTensorInfo(h);return f}if(a){let d=new Ph(s.map(f=>f.shape),t);return e.runWebGLProgram(d,s,o)}let{tensors2D:p,outShape:u}=Z9(s,t,e),c=new Ah(p.map(d=>d.shape)),l=e.runWebGLProgram(c,p,o);p.forEach(d=>e.disposeIntermediateTensorInfo(d));let m=te({inputs:{x:l},attrs:{shape:u},backend:e});return e.disposeIntermediateTensorInfo(l),m}function Z9(r,t,e){let o=w.computeOutShape(r.map(s=>s.shape),t);return{tensors2D:r.map(s=>te({inputs:{x:s},attrs:{shape:[-1,y.sizeFromShape(s.shape.slice(t))]},backend:e})),outShape:o}}function Iv(r){let{inputs:t,backend:e,attrs:o}=r,{axis:n}=o,s=y.parseAxisParam(n,t[0].shape)[0],a=t.map(u=>u.shape);w.assertParamsConsistent(a,s);let i=w.computeOutShape(t.map(u=>u.shape),s);if(y.sizeFromShape(i)===0)return e.makeTensorInfo(i,t[0].dtype,[]);let p=t.filter(u=>y.sizeFromShape(u.shape)>0);return p.length===1?Dt({inputs:{x:p[0]},backend:e}):Bc(p,s,e)}var OA={kernelName:ta,backendName:"webgl",kernelFunc:Iv};var zc=class{constructor(t,e=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.outputShape=t.outShape;let a=t.padInfo.top,i=t.padInfo.left,p=t.strideHeight,u=t.strideWidth,c=t.dilationHeight,l=t.dilationWidth,m=t.filterHeight,d=t.filterWidth,f=Math.floor(t.inChannels/4)*4,h=t.inChannels%4,g=t.dataFormat==="channelsLast",x=g?1:2,b=g?2:3,C=g?3:1,S="",k="";o&&(n?S=`float activation(float a) {
2022-11-18 17:13:29 +01:00
float b = getPreluActivationWeightsAtOutCoords();
${o}
2023-05-08 15:12:41 +02:00
}`:s?S=`float activation(float a) {
2022-11-18 17:13:29 +01:00
float b = getLeakyreluAlphaAtOutCoords();
${o}
2023-05-08 15:12:41 +02:00
}`:S=`
2022-11-18 17:13:29 +01:00
float activation(float x) {
${o}
}
2023-08-05 15:03:11 +02:00
`,k="result = activation(result);");let _=e?"result += getBiasAtOutCoords();":"";e&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
2023-05-08 15:12:41 +02:00
${S}
2022-11-18 17:13:29 +01:00
const ivec2 strides = ivec2(${p}, ${u});
const ivec2 pads = ivec2(${a}, ${i});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d2 = coords[${C}];
2022-11-18 17:13:29 +01:00
ivec2 xRCCorner =
2022-11-20 22:20:02 +01:00
ivec2(coords[${x}], coords[${b}]) * strides - pads;
2022-11-18 17:13:29 +01:00
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${m}; wR++) {
int xR = xRCorner + wR * ${c};
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
2022-11-20 22:20:02 +01:00
for (int wC = 0; wC < ${d}; wC++) {
2022-11-18 17:13:29 +01:00
int xC = xCCorner + wC * ${l};
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
continue;
}
2022-11-20 22:20:02 +01:00
for (int d1 = 0; d1 < ${f}; d1 += 4) {
2022-11-18 17:13:29 +01:00
vec4 wValues = vec4(
getW(wR, wC, d1, d2),
getW(wR, wC, d1 + 1, d2),
getW(wR, wC, d1 + 2, d2),
getW(wR, wC, d1 + 3, d2)
);
if (${g}) {
vec4 xValues = vec4(
getX(batch, xR, xC, d1),
getX(batch, xR, xC, d1 + 1),
getX(batch, xR, xC, d1 + 2),
getX(batch, xR, xC, d1 + 3)
);
dotProd += dot(xValues, wValues);
} else {
vec4 xValues = vec4(
getX(batch, d1, xR, xC),
getX(batch, d1 + 1, xR, xC),
getX(batch, d1 + 2, xR, xC),
getX(batch, d1 + 3, xR, xC)
);
dotProd += dot(xValues, wValues);
}
}
if (${h===1}) {
if (${g}) {
dotProd +=
2022-11-20 22:20:02 +01:00
getX(batch, xR, xC, ${f}) *
getW(wR, wC, ${f}, d2);
2022-11-18 17:13:29 +01:00
} else {
dotProd +=
2022-11-20 22:20:02 +01:00
getX(batch, ${f}, xR, xC) *
getW(wR, wC, ${f}, d2);
2022-11-18 17:13:29 +01:00
}
} else if (${h===2}) {
vec2 wValues = vec2(
2022-11-20 22:20:02 +01:00
getW(wR, wC, ${f}, d2),
getW(wR, wC, ${f} + 1, d2)
2022-11-18 17:13:29 +01:00
);
if (${g}) {
vec2 xValues = vec2(
2022-11-20 22:20:02 +01:00
getX(batch, xR, xC, ${f}),
getX(batch, xR, xC, ${f} + 1)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
} else {
vec2 xValues = vec2(
2022-11-20 22:20:02 +01:00
getX(batch, ${f}, xR, xC),
getX(batch, ${f} + 1, xR, xC)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
}
} else if (${h===3}) {
vec3 wValues = vec3(
2022-11-20 22:20:02 +01:00
getW(wR, wC, ${f}, d2),
getW(wR, wC, ${f} + 1, d2),
getW(wR, wC, ${f} + 2, d2)
2022-11-18 17:13:29 +01:00
);
if (${g}) {
vec3 xValues = vec3(
2022-11-20 22:20:02 +01:00
getX(batch, xR, xC, ${f}),
getX(batch, xR, xC, ${f} + 1),
getX(batch, xR, xC, ${f} + 2)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
} else {
vec3 xValues = vec3(
2022-11-20 22:20:02 +01:00
getX(batch, ${f}, xR, xC),
getX(batch, ${f} + 1, xR, xC),
getX(batch, ${f} + 2, xR, xC)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
}
}
}
}
float result = dotProd;
${_}
${k}
setOutput(result);
}
`}},Oh=class{constructor(t){this.variableNames=["x","W"],this.outputShape=t.outShape;let e=t.padInfo.front,o=t.padInfo.top,n=t.padInfo.left,s=t.strideDepth,a=t.strideHeight,i=t.strideWidth,p=t.dilationDepth,u=t.dilationHeight,c=t.dilationWidth,l=t.filterDepth,m=t.filterHeight,d=t.filterWidth,f=Math.floor(t.inChannels/4)*4,h=t.inChannels%4;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec3 strides = ivec3(${s}, ${a}, ${i});
2023-08-05 15:03:11 +02:00
const ivec3 pads = ivec3(${e}, ${o}, ${n});
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int d2 = coords.u;
ivec3 xFRCCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xFCorner = xFRCCorner.x;
int xRCorner = xFRCCorner.y;
int xCCorner = xFRCCorner.z;
// Convolve x(?, ?, ?, d1) with w(:, :, :, d1, d2) to get
// y(yF, yR, yC, d2). ? = to be determined. : = across all
// values in that axis.
float dotProd = 0.0;
for (int wF = 0; wF < ${l}; wF++) {
int xF = xFCorner + wF * ${p};
2023-08-05 15:03:11 +02:00
if (xF < 0 || xF >= ${t.inDepth}) {
2022-11-18 17:13:29 +01:00
continue;
}
for (int wR = 0; wR < ${m}; wR++) {
int xR = xRCorner + wR * ${u};
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
2022-11-20 22:20:02 +01:00
for (int wC = 0; wC < ${d}; wC++) {
2022-11-18 17:13:29 +01:00
int xC = xCCorner + wC * ${c};
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
continue;
}
2022-11-20 22:20:02 +01:00
for (int d1 = 0; d1 < ${f}; d1 += 4) {
2022-11-18 17:13:29 +01:00
vec4 xValues = vec4(
getX(batch, xF, xR, xC, d1),
getX(batch, xF, xR, xC, d1 + 1),
getX(batch, xF, xR, xC, d1 + 2),
getX(batch, xF, xR, xC, d1 + 3)
);
vec4 wValues = vec4(
getW(wF, wR, wC, d1, d2),
getW(wF, wR, wC, d1 + 1, d2),
getW(wF, wR, wC, d1 + 2, d2),
getW(wF, wR, wC, d1 + 3, d2)
);
dotProd += dot(xValues, wValues);
}
if (${h===1}) {
dotProd +=
2022-11-20 22:20:02 +01:00
getX(batch, xF, xR, xC, ${f}) *
getW(wF, wR, wC, ${f}, d2);
2022-11-18 17:13:29 +01:00
} else if (${h===2}) {
vec2 xValues = vec2(
2022-11-20 22:20:02 +01:00
getX(batch, xF, xR, xC, ${f}),
getX(batch, xF, xR, xC, ${f} + 1)
2022-11-18 17:13:29 +01:00
);
vec2 wValues = vec2(
2022-11-20 22:20:02 +01:00
getW(wF, wR, wC, ${f}, d2),
getW(wF, wR, wC, ${f} + 1, d2)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
} else if (${h===3}) {
vec3 xValues = vec3(
2022-11-20 22:20:02 +01:00
getX(batch, xF, xR, xC, ${f}),
getX(batch, xF, xR, xC, ${f} + 1),
getX(batch, xF, xR, xC, ${f} + 2)
2022-11-18 17:13:29 +01:00
);
vec3 wValues = vec3(
2022-11-20 22:20:02 +01:00
getW(wF, wR, wC, ${f}, d2),
getW(wF, wR, wC, ${f} + 1, d2),
getW(wF, wR, wC, ${f} + 2, d2)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
}
}
}
}
setOutput(dotProd);
}
`}};var Vc=class{constructor(t,e=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=t.outShape,this.enableShapeUniforms=ut(this.outputShape.length);let a=t.padInfo.left,i=t.strideWidth,p=t.dilationWidth,u=t.filterHeight,c=t.filterWidth,l=c,m=`
2022-11-18 17:13:29 +01:00
int xR; int xC; int xCOffset;
vec4 wTexel; vec4 previous; vec4 final;`;for(let g=0;g<c;g++)m+=`
vec4 xTexelC${g*2};
int xTexelC${g*2}Ready;
vec4 xTexelC${g*2+1};
int xTexelC${g*2+1}Ready;
vec4 xC${g};`;m+=`
for (int r = 0; r < ${u}; r++) {
2023-08-05 15:03:11 +02:00
for (int d1 = 0; d1 < ${t.inChannels}; d1 += 2) {
2022-11-18 17:13:29 +01:00
`;for(let g=0;g<c;g++)m+=`
xTexelC${g*2} = vec4(0.0);
xTexelC${g*2}Ready = 0;
xTexelC${g*2+1} = vec4(0.0);
xTexelC${g*2+1}Ready = 0;
xC${g} = vec4(0.0);`;m+=`
xR = xRCorner + r * dilations[0];
if (xR >=0 && xR < inDims[0]) {
2022-11-20 22:20:02 +01:00
`;for(let g=0;g<(l+1)/2;g++){let x=g*2;if(m+=`
xC = xCCorner + ${x*p};
`,i===1){if(x<c&&(a%2===1?(m+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1;
2022-11-20 22:20:02 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`,p===1&&x>0?m+=`
xC${x} = vec4(xTexelC${x-2}.zw, xTexelC${x}.xy);
2022-11-18 17:13:29 +01:00
`:m+=`
xCOffset = xC + 1 - 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
previous.zw = vec2(0.0);
}
2022-11-20 22:20:02 +01:00
xC${x} = vec4(previous.zw, xTexelC${x}.xy);
2022-11-18 17:13:29 +01:00
} else {
2022-11-20 22:20:02 +01:00
xC${x} = vec4(0.0, 0.0, xTexelC${x}.xy);
2022-11-18 17:13:29 +01:00
}
`):m+=`
2022-11-20 22:20:02 +01:00
if (xC >= 0 && xC < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xC, d1);
2022-11-18 17:13:29 +01:00
if (xC + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x} = xTexelC${x};
`,x+1<c)){let b=a%2===0?y.nearestLargerEven(p):p;p%2===0&&a%2===1||p%2!==0&&a%2!==1?(m+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + imod(pads[1], 2) + ${b};
2022-11-20 22:20:02 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
`,p>1?m+=`
xCOffset -= 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(previous.zw, xTexelC${x+1}.xy);
2022-11-18 17:13:29 +01:00
} else {
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(0.0, 0.0, xTexelC${x+1}.xy);
2022-11-18 17:13:29 +01:00
}
`:m+=`
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(xTexelC${x}.zw, xTexelC${x+1}.xy);
2022-11-18 17:13:29 +01:00
`):b===1?m+=`
2022-11-20 22:20:02 +01:00
xC${x+1} = xTexelC${x};
2022-11-18 17:13:29 +01:00
`:m+=`
xCOffset = xC + ${b};
2022-11-20 22:20:02 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x+1} = xTexelC${x+1};
`}}else x<c&&(a%2===1?(m+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1 - strides[1];
2022-11-20 22:20:02 +01:00
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xC + 1, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xC + 2 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x} = vec4(xTexelC${x}.zw, xTexelC${x+1}.zw);
`,x+1<c&&(m+=`
2022-11-18 17:13:29 +01:00
final = vec4(0.0);
xCOffset = xC + 1 + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1]) {
final = getX(batch, xR, xCOffset, d1);
}
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(xTexelC${x+1}.xy, final.xy);
2022-11-18 17:13:29 +01:00
`)):(m+=`
2022-11-20 22:20:02 +01:00
if(xC >= 0 && xC < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xC, d1);
2022-11-18 17:13:29 +01:00
if (xC + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
xCOffset = xC + strides[1];
2022-11-20 22:20:02 +01:00
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x} = vec4(
xTexelC${x}.xy, xTexelC${x+1}.xy);
`,x+1<c&&(m+=`
xC${x+1} = vec4(xTexelC${x}.zw, xTexelC${x+1}.zw);
`)));x<c&&(m+=`
wTexel = getW(r, ${x}, d1, d2);
dotProd += xC${x}.xxzz * vec4(wTexel.xy, wTexel.xy);
2023-08-05 15:03:11 +02:00
if(d1 + 1 < ${t.inChannels}) {
2022-11-20 22:20:02 +01:00
dotProd += xC${x}.yyww * vec4(wTexel.zw, wTexel.zw);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`,x+1<c&&(m+=`
wTexel = getW(r, ${x+1}, d1, d2);
dotProd += xC${x+1}.xxzz * vec4(wTexel.xy, wTexel.xy);
2023-08-05 15:03:11 +02:00
if(d1 + 1 < ${t.inChannels}) {
2022-11-20 22:20:02 +01:00
dotProd += xC${x+1}.yyww * vec4(wTexel.zw, wTexel.zw);
2022-11-18 17:13:29 +01:00
}
`))}m+=`
}
`,m+=`
}
`,m+=`
}
2022-11-20 22:20:02 +01:00
`;let d="",f="";o&&(n?d=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getPreluActivationWeightsAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:s?d=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getLeakyreluAlphaAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:d=`vec4 activation(vec4 x) {
2022-11-18 17:13:29 +01:00
${o}
2023-08-05 15:03:11 +02:00
}`,f="result = activation(result);");let h=e?"result += getBiasAtOutCoords();":"";e&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
//intialize dotProd with a small epsilon seems to reduce GPU accuracy loss.
vec4 dotProd = vec4(0.000000000000001);
${m}
vec4 result = dotProd - vec4(0.000000000000001);
${h}
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
`}};var Mh=class{constructor(t,e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"inputShape",type:"ivec4"},{name:"pad",type:"ivec2"},{name:"stride",type:"ivec2"},{name:"dilation",type:"ivec2"},{name:"inChannels",type:"int"},{name:"itemsPerBlockRow",type:"int"},{name:"outWidth",type:"int"}],this.outputShape=t,this.enableShapeUniforms=ut(this.outputShape.length);let{dataFormat:o}=e,n=It(),s=o==="channelsLast",a=s?1:2,i=s?2:3,p=this.enableShapeUniforms?"if(blockIndex < outShape[2] && pos < outShape[1]) {":`if(blockIndex < ${t[2]} && pos < ${t[1]}) {`,u="";for(let c=0;c<=1;c++)for(let l=0;l<=1;l++)u+=`
2022-11-18 17:13:29 +01:00
blockIndex = rc.z + ${l};
pos = rc.y + ${c};
${p}
offsetY = int(blockIndex / outWidth) * stride[0] - pad[0];
d0 = offsetY + dilation[0] * (pos / itemsPerBlockRow);
if(d0 < inputShape[${a}] && d0 >= 0) {
// Use custom imod instead mod. On Intel GPU, mod may generate
// unexpected value.
// https://github.com/tensorflow/tfjs/issues/5447
offsetX = imod(blockIndex, outWidth) * stride[1] - pad[1];
d1 = offsetX + dilation[1] * (imod(pos, itemsPerBlockRow) /
inChannels);
if(d1 < inputShape[${i}] && d1 >= 0) {
ch = imod(pos, inChannels);
if (${s}) {
innerDims = vec2(d1, ch);
result[${c*2+l}] = getChannel(
getA(rc.x, d0, int(innerDims.x),
int(innerDims.y)), innerDims);
} else {
innerDims = vec2(d0, d1);
result[${c*2+l}] = getChannel(
getA(rc.x, ch, int(innerDims.x),
int(innerDims.y)), innerDims);
}
}
}
}
`;this.userCode=`
void main() {
ivec3 rc = getOutputCoords();
vec4 result = vec4(0);
int blockIndex, pos, offsetY, d0, offsetX, d1, ch;
vec2 innerDims;
${u}
${n.output} = result;
}
`}};function Lh(r,t){let e=r.length;return e>=3?t?[...r.slice(0,-3),r[e-3]*r[e-2],r[e-1]]:[...r.slice(0,-3),r[e-3],r[e-2]*r[e-1]]:!t&&e===1&&r[0]>1?[r[0],1]:null}function Bh({x:r,filter:t,convInfo:e,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=r.shape,u=o.texData.get(r.dataId),c=e.inChannels,l=p[0]*p[1]*p[2],m=e.outChannels,d=e.dataFormat==="channelsLast",f=!1,h=!1,g,x=[];if(s!=null){let S=Lh(s.shape,d);S!=null&&(s=te({inputs:{x:s},backend:o,attrs:{shape:S}}),x.push(s))}if(n!=null){let S=Lh(n.shape,d);S!=null&&(n=te({inputs:{x:n},backend:o,attrs:{shape:S}}),x.push(n))}if(!((l===1||m===1)&&c>bv)&&u.isPacked&&d&&u.texture!=null&&p[2]%2!==0&&y.arraysEqual(u.shape.slice(-3),p.slice(-3))){let S=p[0]*p[1]*(p[2]+1),k={dataId:r.dataId,shape:[1,S,e.inChannels],dtype:r.dtype},_=u.shape;u.shape=u.shape.slice(),u.shape[u.shape.length-2]++,y.assert(hu(u.shape,k.shape),()=>`packed reshape ${u.shape} to ${k.shape} isn't free`);let E=te({inputs:{x:t},backend:o,attrs:{shape:[1,e.inChannels,e.outChannels]}});x.push(E);let R=Cp({a:k,b:E,backend:o,transposeA:f,transposeB:h,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),D=o.texData.get(R.dataId);y.assert(D.isPacked,()=>"batchMatMul result is expected to be packed"),u.shape=_,D.shape=e.outShape,g=Dt({inputs:{x:R},backend:o}),g.shape=e.outShape,x.push(R)}else{let S=e.outHeight*e.outWidth,k=te({inputs:{x:r},backend:o,attrs:{shape:d?[e.batchSize,S,e.inChannels]:[e.batchSize,e.inChannels,S]}}),_=te({inputs:{x:t},backend:o,attrs:{shape:[1,e.inChannels,e.outChannels]}}),E=Cp({a:d?k:_,b:d?_:k,transposeA:!d,transposeB:h,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a});g=te({inputs:{x:E},backend:o,attrs:{shape:e.outShape}}),x.push(k),x.push(_),x.push(E)}for(let S of x)o.disposeIntermediateTensorInfo(S);return g}function zh({x:r,filter:t,convInfo:e,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let{filterWidth:p,filterHeight:u,inChannels:c,outWidth:l,outHeight:m,dataFormat:d}=e,f=d==="channelsLast",h=p*u*c,g=m*l,x=[e.batchSize,h,g],b=!0,C=!1,S=[];if(s!=null){let q=Lh(s.shape,f);q!=null&&(s=te({inputs:{x:s},backend:o,attrs:{shape:q}}),S.push(s))}if(n!=null){let q=Lh(n.shape,f);q!=null&&(n=te({inputs:{x:n},backend:o,attrs:{shape:q}}),S.push(n))}let k=te({inputs:{x:t},backend:o,attrs:{shape:[1,h,y.sizeFromShape(t.shape)/h]}});S.push(k);let _=new Mh(x,e),E=[r.shape,[e.padInfo.top,e.padInfo.left],[e.strideHeight,e.strideWidth],[e.dilationHeight,e.dilationWidth],[e.inChannels],[e.filterWidth*e.inChannels],[e.outWidth]],R=o.runWebGLProgram(_,[r],"float32",E),D=te({inputs:{x:R},backend:o,attrs:{shape:x}});S.push(R),S.push(D);let P=n!=null,O=s!=null,M=i==="leakyrelu",L=i?xi(i,!0):null,B=new Lc(f?D.shape:k.shape,f?k.shape:D.shape,f?[e.batchSize,g,e.outChannels]:[e.batchSize,e.outChannels,g],b,C,P,L,O,M),z=f?[D,k]:[k,D];if(n&&z.push(n),O&&z.push(s),M){let q=o.makeTensorInfo([],"float32",y.createScalarValue(a,"float32"));z.push(q),S.push(q)}let U=o.runWebGLProgram(B,z,"float32"),j=te({inputs:{x:U},backend:o,attrs:{shape:e.outShape}});S.push(U);for(let q of S)o.disposeIntermediateTensorInfo(q);return j}function J9(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s}=t,{strides:a,pad:i,dataFormat:p,dilations:u,dimRoundingMode:c}=o,l=w.convertConv2DDataFormat(p),m=w.computeConv2DInfo(n.shape,s.shape,a,u,i,c,!1,l),d;if(m.filterHeight===1&&m.filterWidth===1&&m.dilationHeight===1&&m.dilationWidth===1&&m.strideHeight===1&&m.strideWidth===1&&(m.padInfo.type==="SAME"||m.padInfo.type==="VALID"))d=Bh({x:n,filter:s,convInfo:m,backend:e});else if(m.strideWidth<=2&&l==="channelsLast"&&A().getBool("WEBGL_EXP_CONV")){let h=new Vc(m),g=[[m.padInfo.top,m.padInfo.left],[m.strideHeight,m.strideWidth],[m.dilationHeight,m.dilationWidth],[m.inHeight,m.inWidth]];d=e.runWebGLProgram(h,[n,s],"float32",g)}else if(A().getBool("WEBGL_CONV_IM2COL"))d=zh({x:n,filter:s,convInfo:m,backend:e});else{let h=new zc(m);d=e.runWebGLProgram(h,[n,s],"float32")}let f=te({inputs:{x:d},backend:e,attrs:{shape:m.outShape
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int wR = coords.x;
int wC = coords.y;
int d1 = coords.z;
int d2 = coords.w;
// Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
2023-08-05 15:03:11 +02:00
for (int b = 0; b < ${t.batchSize}; b++) {
for (int yR = 0; yR < ${t.outHeight}; yR++) {
int xR = wR + yR * ${e} - ${n};
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
2023-08-05 15:03:11 +02:00
for (int yC = 0; yC < ${t.outWidth}; yC++) {
2022-11-18 17:13:29 +01:00
int xC = wC + yC * ${o} - ${s};
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
continue;
}
2023-05-08 15:12:41 +02:00
${a?`float dyValue = getDy(b, yR, yC, d2);
float xValue = getX(b, xR, xC, d1);
dotProd += (xValue * dyValue);`:`float dyValue = getDy(b, d2, yR, yC);
float xValue = getX(b, d1, xR, xC);
dotProd += (xValue * dyValue);`}
2022-11-18 17:13:29 +01:00
}
}
}
setOutput(dotProd);
}
`}},Wh=class{constructor(t){this.variableNames=["dy","W"],this.outputShape=t.inShape;let e=t.filterHeight,o=t.filterWidth,n=t.strideHeight,s=t.strideWidth,a=t.dataFormat==="channelsLast",i=e-1-t.padInfo.top,p=o-1-t.padInfo.left,u=a?1:2,c=a?2:3,l=a?3:1;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${i}, ${p});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d1 = coords[${l}];
ivec2 dyCorner = ivec2(coords[${u}], coords[${c}]) - pads;
int dyRCorner = dyCorner.x;
int dyCCorner = dyCorner.y;
// Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
2023-08-05 15:03:11 +02:00
for (int wR = 0; wR < ${e}; wR++) {
2022-11-18 17:13:29 +01:00
float dyR = float(dyRCorner + wR) / ${n}.0;
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 || fract(dyR) > 0.0) {
2022-11-18 17:13:29 +01:00
continue;
}
int idyR = int(dyR);
2023-08-05 15:03:11 +02:00
int wRPerm = ${e} - 1 - wR;
2022-11-18 17:13:29 +01:00
for (int wC = 0; wC < ${o}; wC++) {
float dyC = float(dyCCorner + wC) / ${s}.0;
2023-08-05 15:03:11 +02:00
if (dyC < 0.0 || dyC >= ${t.outWidth}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${o} - 1 - wC;
2023-08-05 15:03:11 +02:00
for (int d2 = 0; d2 < ${t.outChannels}; d2++) {
2022-11-18 17:13:29 +01:00
if (${a}) {
float xValue = getDy(batch, idyR, idyC, d2);
float wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
} else {
float xValue = getDy(batch, d2, idyR, idyC);
float wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
}
}
}
}
setOutput(dotProd);
}
`}},Uh=class{constructor(t){this.variableNames=["x","dy"],this.outputShape=t.filterShape;let e=t.strideDepth,o=t.strideHeight,n=t.strideWidth,s=t.padInfo.front,a=t.padInfo.top,i=t.padInfo.left;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int wF = coords.x;
int wR = coords.y;
int wC = coords.z;
int d1 = coords.w;
int d2 = coords.u;
float dotProd = 0.0;
2023-08-05 15:03:11 +02:00
for (int b = 0; b < ${t.batchSize}; b++) {
for (int yF = 0; yF < ${t.outDepth}; yF++) {
int xF = wF + yF * ${e} - ${s};
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
if (xF < 0 || xF >= ${t.inDepth}) {
2022-11-18 17:13:29 +01:00
continue;
}
2023-08-05 15:03:11 +02:00
for (int yR = 0; yR < ${t.outHeight}; yR++) {
2022-11-18 17:13:29 +01:00
int xR = wR + yR * ${o} - ${a};
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
2023-08-05 15:03:11 +02:00
for (int yC = 0; yC < ${t.outWidth}; yC++) {
2022-11-18 17:13:29 +01:00
int xC = wC + yC * ${n} - ${i};
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
continue;
}
float dyValue = getDy(b, yF, yR, yC, d2);
float xValue = getX(b, xF, xR, xC, d1);
dotProd += (xValue * dyValue);
}
}
}
}
setOutput(dotProd);
}
`}},Gh=class{constructor(t){this.variableNames=["dy","W"],this.outputShape=t.inShape;let e=t.filterDepth,o=t.filterHeight,n=t.filterWidth,s=t.strideDepth,a=t.strideHeight,i=t.strideWidth,p=e-1-t.padInfo.front,u=o-1-t.padInfo.top,c=n-1-t.padInfo.left;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec3 pads = ivec3(${p}, ${u}, ${c});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int d1 = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyFCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
float dotProd = 0.0;
2023-08-05 15:03:11 +02:00
for (int wF = 0; wF < ${e}; wF++) {
2022-11-18 17:13:29 +01:00
float dyF = float(dyFCorner + wF) / ${s}.0;
2023-08-05 15:03:11 +02:00
if (dyF < 0.0 || dyF >= ${t.outDepth}.0 || fract(dyF) > 0.0) {
2022-11-18 17:13:29 +01:00
continue;
}
int idyF = int(dyF);
2023-08-05 15:03:11 +02:00
int wFPerm = ${e} - 1 - wF;
2022-11-18 17:13:29 +01:00
for (int wR = 0; wR < ${o}; wR++) {
float dyR = float(dyRCorner + wR) / ${a}.0;
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
int wRPerm = ${o} - 1 - wR;
for (int wC = 0; wC < ${n}; wC++) {
float dyC = float(dyCCorner + wC) / ${i}.0;
2023-08-05 15:03:11 +02:00
if (dyC < 0.0 || dyC >= ${t.outWidth}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${n} - 1 - wC;
2023-08-05 15:03:11 +02:00
for (int d2 = 0; d2 < ${t.outChannels}; d2++) {
2022-11-18 17:13:29 +01:00
float xValue = getDy(batch, idyF, idyR, idyC, d2);
float wValue = getW(wFPerm, wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
}
}
}
}
setOutput(dotProd);
}
`}};function eJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,dy:s}=t,{strides:a,pad:i,dataFormat:p,dimRoundingMode:u,filterShape:c}=o,l=w.convertConv2DDataFormat(p),m=w.computeConv2DInfo(n.shape,c,a,1,i,u,!1,l),d=new Vh(m);return e.runWebGLProgram(d,[n,s],"float32")}var LA={kernelName:Ai,backendName:"webgl",kernelFunc:eJ};var Hh=class{constructor(t){this.variableNames=["dy","W"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"strides",type:"vec2"}],this.outputShape=t.inShape,this.enableShapeUniforms=ut(this.outputShape.length);let e=t.filterHeight,o=t.filterWidth,n=e-1-t.padInfo.top,s=o-1-t.padInfo.left;this.userCode=`
2023-05-08 15:12:41 +02:00
const ivec2 pads = ivec2(${n}, ${s});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d1 = coords[3];
ivec2 dyCorner = ivec2(coords[1], coords[2]) - pads;
int dyRCorner = dyCorner.x;
int dyCCorner = dyCorner.y;
vec4 result = vec4(0.);
2023-08-05 15:03:11 +02:00
for (int wR = 0; wR < ${e}; wR++) {
2023-05-08 15:12:41 +02:00
float dyR = float(dyRCorner + wR) / strides[0];
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 || fract(dyR) > 0.0) {
2023-05-08 15:12:41 +02:00
continue;
}
int idyR = int(dyR);
2023-08-05 15:03:11 +02:00
int wRPerm = ${e} - 1 - wR;
2023-05-08 15:12:41 +02:00
for (int wC = 0; wC < ${o}; wC++) {
int wCPerm = ${o} - 1 - wC;
float dyC = float(dyCCorner + wC) / strides[1];
2023-08-05 15:03:11 +02:00
bool idyCVal = (dyC >= 0.0) && (dyC < ${t.outWidth}.0)
2023-05-08 15:12:41 +02:00
&& (fract(dyC) == 0.0);
int idyC = int(dyC);
float dyC2 = float(dyCCorner + wC + 1) / strides[1];
2023-08-05 15:03:11 +02:00
bool idyCVal2 = (dyC2 >= 0.0) && (dyC2 < ${t.outWidth}.0)
2023-05-08 15:12:41 +02:00
&& (fract(dyC2) == 0.0);
int idyC2 = int(dyC2);
if (idyCVal && idyCVal2) {
2023-08-05 15:03:11 +02:00
for (int d2 = 0; d2 < ${t.outChannels}; d2 += 2) {
2023-05-08 15:12:41 +02:00
vec4 wValue = getW(wRPerm, wCPerm, d1, d2);
vec4 dySample = getDy(batch, idyR, idyC, d2);
vec4 dySample2 = (idyC / 2 == idyC2 / 2) ?
dySample : getDy(batch, idyR, idyC2, d2);
vec2 dyValue = mod(float(idyC), 2.) == 0. ?
dySample.xy : dySample.zw;
result.xy += vec2(dot(dyValue, wValue.xy),
dot(dyValue, wValue.zw));
dyValue = mod(float(idyC2), 2.) == 0. ?
dySample2.xy : dySample2.zw;
result.zw += vec2(dot(dyValue, wValue.xy),
dot(dyValue, wValue.zw));
}
} else if (idyCVal) {
2023-08-05 15:03:11 +02:00
for (int d2 = 0; d2 < ${t.outChannels}; d2 += 2) {
2023-05-08 15:12:41 +02:00
vec4 wValue = getW(wRPerm, wCPerm, d1, d2);
vec4 dySample = getDy(batch, idyR, idyC, d2);
vec2 dyValue = mod(float(idyC), 2.) == 0. ?
dySample.xy : dySample.zw;
result.xy += vec2(dot(dyValue, wValue.xy),
dot(dyValue, wValue.zw));
}
} else if (idyCVal2) {
2023-08-05 15:03:11 +02:00
for (int d2 = 0; d2 < ${t.outChannels}; d2 += 2) {
2023-05-08 15:12:41 +02:00
vec4 wValue = getW(wRPerm, wCPerm, d1, d2);
vec4 dySample = getDy(batch, idyR, idyC2, d2);
vec2 dyValue = mod(float(idyC2), 2.) == 0. ?
dySample.xy : dySample.zw;
result.zw += vec2(dot(dyValue, wValue.xy),
dot(dyValue, wValue.zw));
}
}
}
}
setOutput(result);
}
`}};function tJ(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,filter:s}=t,{inputShape:a,strides:i,pad:p,dataFormat:u,dimRoundingMode:c}=o,l=w.convertConv2DDataFormat(u),m=w.computeConv2DInfo(a,s.shape,i,1,p,c,!1,l);if(A().getBool("WEBGL_PACK_CONV2DTRANSPOSE")&&l==="channelsLast"){let d=[[m.strideHeight,m.strideWidth]],f=new Hh(m);return e.runWebGLProgram(f,[n,s],"float32",d)}else{let d=new Wh(m);return e.runWebGLProgram(d,[n,s],"float32")}}var BA={kernelName:rn,backendName:"webgl",kernelFunc:tJ};function rJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s}=t,{strides:a,pad:i,dilations:p}=o,u=w.computeConv3DInfo(n.shape,s.shape,a,p,i),c=new Oh(u);return e.runWebGLProgram(c,[n,s],"float32")}var zA={kernelName:on,backendName:"webgl",kernelFunc:rJ};function oJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,dy:s}=t,{strides:a,pad:i,filterShape:p}=o,u=w.computeConv3DInfo(n.shape,p,a,1,i),c=new Uh(u);return e.runWebGLProgram(c,[n,s],"float32")}var VA={kernelName:ja,backendName:"webgl",kernelFunc:oJ};function nJ(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,filter:s}=t,{pad:a,strides:i,inputShape:p}=o,u=w.computeConv3DInfo(p,s.shape,i,1,a),c=new Gh(u);return e.runWebGLProgram(c,[n,s],"float32")}var WA={kernelName:nn,backendName:"webgl",kernelFunc:nJ};var sJ=Fo+`
2022-11-18 17:13:29 +01:00
return cos(x);
2023-08-05 15:03:11 +02:00
`,aJ=`
2023-05-08 15:12:41 +02:00
vec4 result = cos(x);
bvec4 isNaN = isnan(x);
${jr}
2023-05-08 15:12:41 +02:00
return result;
`,iJ=xe({opSnippet:sJ,packedOpSnippet:aJ}),UA={kernelName:sn,backendName:"webgl",kernelFunc:iJ};var uJ=`
2022-11-18 17:13:29 +01:00
float e2x = exp(-x);
return (e2x + 1.0 / e2x) / 2.0;
`,pJ=xe({opSnippet:uJ}),GA={kernelName:an,backendName:"webgl",kernelFunc:pJ};var Kh=class{constructor(t,e,o,n,s){this.variableNames=["Image","Boxes","BoxInd"],this.outputShape=[];let[a,i,p,u]=t,[c]=e,[l,m]=o;this.outputShape=[c,l,m,u];let d=n==="bilinear"?1:0,[f,h]=[`${i-1}.0`,`${p-1}.0`],[g,x,b]=l>1?[`${(i-1)/(l-1)}`,"(y2-y1) * height_ratio",`y1*${f} + float(y)*(height_scale)`]:["0.0","0.0",`0.5 * (y1+y2) * ${f}`],[C,S,k]=m>1?[`${(p-1)/(m-1)}`,"(x2-x1) * width_ratio",`x1*${h} + float(x)*(width_scale)`]:["0.0","0.0",`0.5 * (x1+x2) * ${h}`];this.userCode=`
2022-11-18 17:13:29 +01:00
const float height_ratio = float(${g});
const float width_ratio = float(${C});
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int y = coords[1];
int x = coords[2];
int d = coords[3];
// get box vals
float y1 = getBoxes(b,0);
float x1 = getBoxes(b,1);
float y2 = getBoxes(b,2);
float x2 = getBoxes(b,3);
// get image in batch index
int bInd = round(getBoxInd(b));
if(bInd < 0 || bInd >= ${a}) {
return;
}
2022-11-20 22:20:02 +01:00
float height_scale = ${x};
2023-05-08 15:12:41 +02:00
float width_scale = ${S};
2022-11-18 17:13:29 +01:00
float in_y = ${b};
2022-11-20 22:20:02 +01:00
if( in_y < 0.0 || in_y > ${f} ) {
2022-11-18 17:13:29 +01:00
setOutput(float(${s}));
return;
}
float in_x = ${k};
if( in_x < 0.0 || in_x > ${h} ) {
setOutput(float(${s}));
return;
}
vec2 sourceFracIndexCR = vec2(in_x,in_y);
2022-11-20 22:20:02 +01:00
if(${d} == 1) {
2022-11-18 17:13:29 +01:00
// Compute the four integer indices.
ivec2 sourceFloorCR = ivec2(sourceFracIndexCR);
ivec2 sourceCeilCR = ivec2(ceil(sourceFracIndexCR));
float topLeft = getImage(b, sourceFloorCR.y, sourceFloorCR.x, d);
float bottomLeft = getImage(b, sourceCeilCR.y, sourceFloorCR.x, d);
float topRight = getImage(b, sourceFloorCR.y, sourceCeilCR.x, d);
float bottomRight = getImage(b, sourceCeilCR.y, sourceCeilCR.x, d);
vec2 fracCR = sourceFracIndexCR - vec2(sourceFloorCR);
float top = topLeft + (topRight - topLeft) * fracCR.x;
float bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR.x;
float newValue = top + (bottom - top) * fracCR.y;
setOutput(newValue);
} else {
// Compute the coordinators of nearest neighbor point.
ivec2 sourceNearestCR = ivec2(floor(
sourceFracIndexCR + vec2(0.5,0.5)));
float newValue = getImage(b, sourceNearestCR.y, sourceNearestCR.x, d);
setOutput(newValue);
}
}
`}};var cJ=r=>{let{inputs:t,backend:e,attrs:o}=r,{image:n,boxes:s,boxInd:a}=t,{cropSize:i,method:p,extrapolationValue:u}=o,c=new Kh(n.shape,s.shape,i,p,u);return e.runWebGLProgram(c,[n,s,a],"float32")},HA={kernelName:cn,backendName:"webgl",kernelFunc:cJ};var Sp;(function(r){r.Prod="*",r.Sum="+"})(Sp||(Sp={}));var tm=class{constructor(t,e,o,n){this.op=t,this.outputShape=e,this.variableNames=["x"],this.customUniforms=[{name:"index",type:"float"}];let s=this.outputShape.length,a=this.op===Sp.Prod?"1.0":"0.0",i=o?a:`getX(${KA(s,"coords",this.op)})`,p=this.outputShape[this.outputShape.length-1],u="",c="";o?(u=n?`end != ${p-1}`:"end != 0",c=n?"end + 1":"end - 1"):(u=n?`end + pow2 < ${p}`:"end >= pow2",c=n?"end + pow2":"end - pow2"),this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
2023-05-08 15:12:41 +02:00
${Re(s)} coords = getOutputCoords();
int end = ${qA(s,"coords",this.op)};
2022-11-18 17:13:29 +01:00
float val = ${i};
int pow2 = int(pow(2.0, index));
if (${u}) {
int idx = ${c};
${qA(s,"coords",this.op)} = idx;
val ${this.op}= getX(${KA(s,"coords",this.op)});
2022-11-18 17:13:29 +01:00
}
setOutput(val);
}
`}};function KA(r,t,e){if(r===1)return`${t}`;if(r===2)return`${t}.x, ${t}.y`;if(r===3)return`${t}.x, ${t}.y, ${t}.z`;if(r===4)return`${t}.x, ${t}.y, ${t}.z, ${t}.w`;throw new Error(`Cumulative ${e} for rank ${r} is not yet supported`)}function qA(r,t,e){if(r===1)return`${t}`;if(r===2)return`${t}.y`;if(r===3)return`${t}.z`;if(r===4)return`${t}.w`;throw new Error(`Cumulative ${e} for rank ${r} is not yet supported`)}function qh(r,t,e,o,n,s){let a=t.shape.length,i=w.getAxesPermutation([o],a),p=t;i!=null&&(p=bt({inputs:{x:t},backend:e,attrs:{perm:i}}));let u=w.getInnerMostAxes(1,a)[0];if(u!==a-1)throw new Error(`WebGL cumprod shader expects an inner-most axis=${t.shape.length-1} but got axis=${o}`);let c=p.shape[u],l=Dt({inputs:{x:p},backend:e});for(let m=0;m<=Math.ceil(Math.log2(c))-1;m++){let d=new tm(r,p.shape,!1,s),f=[[m]],h=l;l=e.runWebGLProgram(d,[l],l.dtype,f),e.disposeIntermediateTensorInfo(h)}if(n){let m=new tm(r,p.shape,n,s),d=l;l=e.runWebGLProgram(m,[l],l.dtype),e.disposeIntermediateTensorInfo(d)}if(i!=null){let m=w.getUndoAxesPermutation(i),d=bt({inputs:{x:l},backend:e,attrs:{perm:m}});return e.disposeIntermediateTensorInfo(l),e.disposeIntermediateTensorInfo(p),d}return l}function lJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,exclusive:a,reverse:i}=o;return qh(Sp.Prod,n,e,s,a,i)}var jA={kernelName:un,backendName:"webgl",kernelFunc:lJ};function mJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,exclusive:a,reverse:i}=o;return qh(Sp.Sum,n,e,s,a,i)}var XA={kernelName:pn,backendName:"webgl",kernelFunc:mJ};function dJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,weights:s}=t,{size:a,binaryOutput:i}=o;if(n.shape.length===1){let p=e.readSync(n.dataId),u=e.readSync(s.dataId),c=uh(p,u,s.dtype,s.shape,a);return e.makeTensorInfo([a],s.dtype,c)}else if(n.shape.length===2){let p=e.bufferSync(n),u=e.bufferSync(s),c=AR(p,u,a,i);return e.makeTensorInfo(c.shape,s.dtype,c.values)}throw new Error(`Error in denseBincount: input must be at most rank 2, but got rank${n.shape.length}.`)}var YA={kernelName:ra,backendName:"webgl",kernelFunc:dJ};var jh=class{constructor(t,e,o){this.variableNames=["x"],this.outputShape=[],this.outputShape=t,this.blockSize=e,this.dataFormat=o,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int h = ${this.getHeightCoordString()};
int w = ${this.getWidthCoordString()};
int d = ${this.getDepthCoordString()};
2023-08-05 15:03:11 +02:00
int in_h = h / ${e};
int offset_h = imod(h, ${e});
int in_w = w / ${e};
int offset_w = imod(w, ${e});
int offset_d = (offset_h * ${e} + offset_w) *
2022-11-18 17:13:29 +01:00
${this.getOutputDepthSize()};
int in_d = d + offset_d;
float result = ${this.getInputSamplingString()};
setOutput(result);
}
`}getHeightCoordString(){return this.dataFormat==="NHWC"?"coords[1]":"coords[2]"}getWidthCoordString(){return this.dataFormat==="NHWC"?"coords[2]":"coords[3]"}getDepthCoordString(){return this.dataFormat==="NHWC"?"coords[3]":"coords[1]"}getOutputDepthSize(){return this.dataFormat==="NHWC"?this.outputShape[3]:this.outputShape[1]}getInputSamplingString(){return this.dataFormat==="NHWC"?"getX(b, in_h, in_w, in_d)":"getX(b, in_d, in_h, in_w)"}};function fJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{blockSize:s,dataFormat:a}=o,i=n.shape[0],p=a==="NHWC"?n.shape[1]:n.shape[2],u=a==="NHWC"?n.shape[2]:n.shape[3],c=a==="NHWC"?n.shape[3]:n.shape[1],l=p*s,m=u*s,d=c/(s*s),f=a==="NHWC"?[i,l,m,d]:[i,d,l,m],h=new jh(f,s,a);return e.runWebGLProgram(h,[n],n.dtype)}var QA={kernelName:ln,backendName:"webgl",kernelFunc:fJ};var Wc=class{constructor(t,e=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=t.outShape,this.enableShapeUniforms=ut(this.outputShape.length);let a=t.filterHeight,i=t.filterWidth,p=t.outChannels/t.inChannels,u="",c="";o&&(n?u=`float activation(float a) {
2022-11-18 17:13:29 +01:00
float b = getPreluActivationWeightsAtOutCoords();
${o}
}`:s?u=`float activation(float a) {
float b = getLeakyreluAlphaAtOutCoords();
${o}
}`:u=`
float activation(float x) {
${o}
}
2023-08-05 15:03:11 +02:00
`,c="result = activation(result);");let l=e?"result += getBiasAtOutCoords();":"";e&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
2022-11-18 17:13:29 +01:00
${u}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int d1 = d2 / ${p};
int q = d2 - d1 * ${p};
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// Convolve x(?, ?, d1) with w(:, :, d1, q) to get y(yR, yC, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
// TO DO(dsmilkov): Flatten the two for loops and vec4 the operations.
for (int wR = 0; wR < ${a}; wR++) {
int xR = xRCorner + wR * dilations[0];
if (xR < 0 || xR >= inDims[0]) {
continue;
}
for (int wC = 0; wC < ${i}; wC++) {
int xC = xCCorner + wC * dilations[1];
if (xC < 0 || xC >= inDims[1]) {
continue;
}
float xVal = getX(batch, xR, xC, d1);
float wVal = getW(wR, wC, d1, q);
dotProd += xVal * wVal;
}
}
float result = dotProd;
${l}
${c}
setOutput(result);
}
`}};var Uc=class{constructor(t,e=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=t.outShape,this.enableShapeUniforms=ut(this.outputShape.length);let a=t.outChannels/t.inChannels,i=t.padInfo.left,p=t.strideWidth,u=t.dilationWidth,c=t.filterHeight,l=t.filterWidth,m=l,d=`
2022-11-18 17:13:29 +01:00
int xR; int xC; int xCOffset;
2022-11-20 22:20:02 +01:00
vec4 wTexel; vec4 previous; vec4 final;`;for(let x=0;x<l;x++)d+=`
vec4 xTexelC${x*2};
int xTexelC${x*2}Ready;
vec4 xTexelC${x*2+1};
int xTexelC${x*2+1}Ready;
vec4 xC${x};`;d+=`
2022-11-18 17:13:29 +01:00
for (int r = 0; r < ${c}; r++) {
2022-11-20 22:20:02 +01:00
`;for(let x=0;x<l;x++)d+=`
xTexelC${x*2} = vec4(0.0);
xTexelC${x*2}Ready = 0;
xTexelC${x*2+1} = vec4(0.0);
xTexelC${x*2+1}Ready = 0;
xC${x} = vec4(0.0);`;d+=`
2022-11-18 17:13:29 +01:00
xR = xRCorner + r * dilations[0];
if (xR >=0 && xR < inDims[0]) {
2022-11-20 22:20:02 +01:00
`;for(let x=0;x<(m+1)/2;x++){let b=x*2;if(d+=`
2022-11-18 17:13:29 +01:00
xC = xCCorner + ${b*u};
2022-11-20 22:20:02 +01:00
`,p===1){if(b<l&&(i%2===1?(d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1;
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
2022-11-20 22:20:02 +01:00
`,u===1&&b>0?d+=`
2022-11-18 17:13:29 +01:00
xC${b} = vec4(xTexelC${b-2}.zw, xTexelC${b}.xy);
2022-11-20 22:20:02 +01:00
`:d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1 - 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
previous.zw = vec2(0.0);
}
xC${b} = vec4(previous.zw, xTexelC${b}.xy);
} else {
xC${b} = vec4(0.0, 0.0, xTexelC${b}.xy);
}
2022-11-20 22:20:02 +01:00
`):d+=`
2022-11-18 17:13:29 +01:00
if (xC >= 0 && xC < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
xC${b} = xTexelC${b};
`,b+1<l)){let C=i%2===0?y.nearestLargerEven(u):u;u%2===0&&i%2===1||u%2!==0&&i%2!==1?(d+=`
xCOffset = xC + imod(pads[1], 2) + ${C};
2022-11-18 17:13:29 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
2022-11-20 22:20:02 +01:00
`,u>1?d+=`
2022-11-18 17:13:29 +01:00
xCOffset -= 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
xC${b+1} = vec4(previous.zw, xTexelC${b+1}.xy);
} else {
xC${b+1} = vec4(0.0, 0.0, xTexelC${b+1}.xy);
}
2022-11-20 22:20:02 +01:00
`:d+=`
2022-11-18 17:13:29 +01:00
xC${b+1} = vec4(xTexelC${b}.zw, xTexelC${b+1}.xy);
`):C===1?d+=`
2022-11-18 17:13:29 +01:00
xC${b+1} = xTexelC${b};
2022-11-20 22:20:02 +01:00
`:d+=`
xCOffset = xC + ${C};
2022-11-18 17:13:29 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
xC${b+1} = xTexelC${b+1};
2022-11-20 22:20:02 +01:00
`}}else b<l&&(i%2===1?(d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1 - strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xC + 1, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xC + 2 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
xC${b} = vec4(xTexelC${b}.zw, xTexelC${b+1}.zw);
2022-11-20 22:20:02 +01:00
`,b+1<l&&(d+=`
2022-11-18 17:13:29 +01:00
final = vec4(0.0);
xCOffset = xC + 1 + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1]) {
final = getX(batch, xR, xCOffset, d1);
}
xC${b+1} = vec4(xTexelC${b+1}.xy, final.xy);
2022-11-20 22:20:02 +01:00
`)):(d+=`
2022-11-18 17:13:29 +01:00
if(xC >= 0 && xC < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
xCOffset = xC + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.);
}
xTexelC${b+1}Ready = 1;
}
xC${b} = vec4(
xTexelC${b}.xy, xTexelC${b+1}.xy);
2022-11-20 22:20:02 +01:00
`,b+1<l&&(d+=`
2022-11-18 17:13:29 +01:00
xC${b+1} = vec4(xTexelC${b}.zw, xTexelC${b+1}.zw);
2022-11-20 22:20:02 +01:00
`)));b<l&&(d+=`
2022-11-18 17:13:29 +01:00
wTexel = getW(r, ${b}, d1, q);
dotProd += xC${b} * vec4(wTexel.xz, wTexel.xz);
2022-11-20 22:20:02 +01:00
`,b+1<l&&(d+=`
2022-11-18 17:13:29 +01:00
wTexel = getW(r, ${b+1}, d1, q);
dotProd += xC${b+1} * vec4(wTexel.xz, wTexel.xz);
2022-11-20 22:20:02 +01:00
`))}d+=`
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`,d+=`
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let f="",h="";o&&(n?f=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getPreluActivationWeightsAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:s?f=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getLeakyreluAlphaAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:f=`vec4 activation(vec4 x) {
2022-11-18 17:13:29 +01:00
${o}
2023-08-05 15:03:11 +02:00
}`,h="result = activation(result);");let g=e?"result += getBiasAtOutCoords();":"";e&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int d1 = d2 / ${a};
int q = d2 - d1 * ${a};
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
//intialize dotProd with a small epsilon seems to reduce GPU accuracy loss.
vec4 dotProd = vec4(0.000000000000001);
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
vec4 result = dotProd - vec4(0.000000000000001);
${g}
${h}
setOutput(result);
}
`}};function hJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s}=t,{strides:a,pad:i,dilations:p,dimRoundingMode:u}=o,c=p;c==null&&(c=[1,1]),y.assert(w.eitherStridesOrDilationsAreOne(a,c),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${a} and dilations '${c}'`);let l=w.computeConv2DInfo(n.shape,s.shape,a,c,i,u,!0),m;A().getBool("WEBGL_PACK_DEPTHWISECONV")&&l.strideWidth<=2&&l.outChannels/l.inChannels===1?m=new Uc(l):m=new Wc(l);let d=[[l.padInfo.top,l.padInfo.left],[l.strideHeight,l.strideWidth],[l.dilationHeight,l.dilationWidth],[l.inHeight,l.inWidth]];return e.runWebGLProgram(m,[n,s],"float32",d)}var ZA={kernelName:mn,backendName:"webgl",kernelFunc:hJ};var Xh=class{constructor(t){this.variableNames=["x","dy"],this.outputShape=t.filterShape;let e=t.strideHeight,o=t.strideWidth,n=t.padInfo.top,s=t.padInfo.left,a=t.outChannels/t.inChannels;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int wR = coords.x;
int wC = coords.y;
int d1 = coords.z;
int dm = coords.w;
int d2 = d1 * ${a} + dm;
float dotProd = 0.0;
// TO DO: Vec4 over the batch size
2023-08-05 15:03:11 +02:00
for (int b = 0; b < ${t.batchSize}; b++) {
for (int yR = 0; yR < ${t.outHeight}; yR++) {
int xR = wR + yR * ${e} - ${n};
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
if (xR < 0 || xR >= ${t.inHeight}) {
2022-11-18 17:13:29 +01:00
continue;
}
2023-08-05 15:03:11 +02:00
for (int yC = 0; yC < ${t.outWidth}; yC++) {
2022-11-18 17:13:29 +01:00
int xC = wC + yC * ${o} - ${s};
2023-08-05 15:03:11 +02:00
if (xC < 0 || xC >= ${t.inWidth}) {
2022-11-18 17:13:29 +01:00
continue;
}
float dyValue = getDy(b, yR, yC, d2);
float xValue = getX(b, xR, xC, d1);
dotProd += (xValue * dyValue);
}
}
}
setOutput(dotProd);
}
`}},Yh=class{constructor(t){this.variableNames=["dy","W"],this.outputShape=t.inShape;let e=t.filterHeight,o=t.filterWidth,n=t.strideHeight,s=t.strideWidth,a=e-1-t.padInfo.top,i=o-1-t.padInfo.left,p=t.outChannels/t.inChannels;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${a}, ${i});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d1 = coords[3];
ivec2 dyCorner = coords.yz - pads;
int dyRCorner = dyCorner.x;
int dyCCorner = dyCorner.y;
float dotProd = 0.0;
2023-08-05 15:03:11 +02:00
for (int wR = 0; wR < ${e}; wR++) {
2022-11-18 17:13:29 +01:00
float dyR = float(dyRCorner + wR) / ${n}.0;
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 || fract(dyR) > 0.0) {
2022-11-18 17:13:29 +01:00
continue;
}
int idyR = int(dyR);
2023-08-05 15:03:11 +02:00
int wRPerm = ${e} - 1 - wR;
2022-11-18 17:13:29 +01:00
for (int wC = 0; wC < ${o}; wC++) {
float dyC = float(dyCCorner + wC) / ${s}.0;
2023-08-05 15:03:11 +02:00
if (dyC < 0.0 || dyC >= ${t.outWidth}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${o} - 1 - wC;
// TO DO: Vec4 over the channelMul
for (int dm = 0; dm < ${p}; dm++) {
int d2 = d1 * ${p} + dm;
float xValue = getDy(batch, idyR, idyC, d2);
float wValue = getW(wRPerm, wCPerm, d1, dm);
dotProd += xValue * wValue;
}
}
}
setOutput(dotProd);
}
`}};function gJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,dy:s}=t,{strides:a,dilations:i,pad:p,dimRoundingMode:u,filterShape:c}=o,l=w.computeConv2DInfo(n.shape,c,a,i,p,u,!0),m=new Xh(l);return e.runWebGLProgram(m,[n,s],"float32")}var JA={kernelName:Fi,backendName:"webgl",kernelFunc:gJ};function xJ(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,filter:s}=t,{strides:a,dilations:i,pad:p,dimRoundingMode:u,inputShape:c}=o,l=w.computeConv2DInfo(c,s.shape,a,i,p,u,!0),m=new Yh(l);return e.runWebGLProgram(m,[n,s],"float32")}var eF={kernelName:Pi,backendName:"webgl",kernelFunc:xJ};var Qh=class{constructor(t){this.variableNames=["X"],this.outputShape=[t,t],this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
float val = coords[0] == coords[1] ? getX(coords[0]) : 0.0;
setOutput(val);
}
`}};function yJ(r){let{inputs:t,backend:e}=r,{x:o}=t,n=[...o.shape,...o.shape],s=y.sizeFromShape(o.shape),a=te({inputs:{x:o},backend:e,attrs:{shape:[s]}}),i=new Qh(s),p=e.runWebGLProgram(i,[a],a.dtype),u=te({inputs:{x:p},backend:e,attrs:{shape:n}});return e.disposeIntermediateTensorInfo(a),e.disposeIntermediateTensorInfo(p),u}var tF={kernelName:oa,backendName:"webgl",kernelFunc:yJ};var Zh=class{constructor(t){this.variableNames=["x","W"],this.outputShape=t.outShape;let{inHeight:e,inWidth:o,padInfo:n,strideHeight:s,strideWidth:a,filterHeight:i,filterWidth:p,dilationHeight:u,dilationWidth:c}=t,{top:l,left:m}=n;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 strides = ivec2(${s}, ${a});
const ivec2 pads = ivec2(${l}, ${m});
const float neg_infinity = -3.4e38;
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
int d1 = coords.w;
ivec2 outTopLeftCorner =
coords.yz * strides - pads;
int hBeg = outTopLeftCorner.x;
int wBeg = outTopLeftCorner.y;
float curVal = neg_infinity;
for (int h = 0; h < ${i}; h++) {
int hIn = hBeg + h * ${u};
2023-08-05 15:03:11 +02:00
if (hIn >= 0 && hIn < ${e}) {
2022-11-18 17:13:29 +01:00
for (int w = 0; w < ${p}; w++) {
int wIn = wBeg + w * ${c};
if (wIn >= 0 && wIn < ${o}) {
float xVal = getX(batch, hIn, wIn, d1);
float wVal = getW(h, w, d1);
float val = xVal + wVal;
if (val > curVal) {
curVal = val;
}
}
}
}
}
float result = curVal;
setOutput(result);
}
`}};function bJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s}=t,{strides:a,pad:i,dilations:p}=o,u=w.computeDilation2DInfo(n.shape,s.shape,a,i,"NHWC",p),c,l=new Zh(u);c=e.runWebGLProgram(l,[n,s],"float32");let m=te({inputs:{x:c},backend:e,attrs:{shape:u.outShape}});return e.disposeIntermediateTensorInfo(c),m}var rF={kernelName:dn,backendName:"webgl",kernelFunc:bJ};function CJ(r){let{inputs:t,backend:e,attrs:o}=r,{equation:n}=o,s=t,{allDims:a,summedDims:i,idDims:p}=w.decodeEinsumEquation(n,s.length);w.checkEinsumDimSizes(a.length,p,s);let{path:u,steps:c}=w.getEinsumComputePath(i,p),l=c.length,m=null,d=a.length,f=[];for(let h=0;h<l;++h){for(let g of c[h]){let{permutationIndices:x,expandDims:b}=w.getEinsumPermutation(d,p[g]),C;w.isIdentityPermutation(x)?C=s[g]:(C=bt({inputs:{x:s[g]},backend:e,attrs:{perm:x}}),f.push(C));let S=C.shape.slice();for(let k=0;k<b.length;++k)S.splice(b[k],0,1);y.arraysEqual(C.shape,S)||(C=te({inputs:{x:C},backend:e,attrs:{shape:S}}),f.push(C)),m===null?m=C:(m=Jl({inputs:{a:C,b:m},backend:e}),f.push(m))}h<l-1&&(u[h]>=0&&(m=bp({inputs:{x:m},backend:e,attrs:{axis:u[h]-(a.length-d),keepDims:!1}}),f.push(m)),d--)}for(let h of f)h!==m&&e.disposeIntermediateTensorInfo(h);return m}var oF={kernelName:Li,backendName:"webgl",kernelFunc:CJ};var wJ="return (x >= 0.0) ? x : (exp(x) - 1.0);",SJ=`
2022-11-18 17:13:29 +01:00
vec4 result;
result.r = (x.r >= 0.0) ? x.r : (exp(x.r) - 1.0);
result.g = (x.g >= 0.0) ? x.g : (exp(x.g) - 1.0);
result.b = (x.b >= 0.0) ? x.b : (exp(x.b) - 1.0);
result.a = (x.a >= 0.0) ? x.a : (exp(x.a) - 1.0);
return result;
`,IJ=xe({opSnippet:wJ,packedOpSnippet:SJ}),nF={kernelName:hn,backendName:"webgl",kernelFunc:IJ};var vJ="return (b >= 0.0) ? a : a * (b + 1.0);",kJ=`
2022-11-18 17:13:29 +01:00
vec4 bGTEZero = vec4(greaterThanEqual(b, vec4(0.)));
return (bGTEZero * a) + ((vec4(1.0) - bGTEZero) * (a * (b + vec4(1.0))));
`,NJ=r=>{let{inputs:t,backend:e}=r,{dy:o,y:n}=t,s=A().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new qr(kJ,o.shape,n.shape):new Fr(vJ,o.shape,n.shape);return e.runWebGLProgram(s,[o,n],o.dtype)},sF={kernelName:Xa,backendName:"webgl",kernelFunc:NJ};var TJ=`
2022-11-18 17:13:29 +01:00
return vec4(equal(a, b));
`,_J="return float(a == b);",$J=nt({opSnippet:_J,packedOpSnippet:TJ,dtype:"bool",cpuKernelImpl:LR}),aF={kernelName:xn,backendName:"webgl",kernelFunc:$J};var EJ=`
2022-11-18 17:13:29 +01:00
// Error function is calculated approximately with elementary function.
// See "Handbook of Mathematical Functions with Formulas,
// Graphs, and Mathematical Tables", Abramowitz and Stegun.
float p = ${w.ERF_P};
float a1 = ${w.ERF_A1};
float a2 = ${w.ERF_A2};
float a3 = ${w.ERF_A3};
float a4 = ${w.ERF_A4};
float a5 = ${w.ERF_A5};
2022-11-18 17:13:29 +01:00
float sign = sign(x);
x = abs(x);
float t = 1.0 / (1.0 + p * x);
return sign * (1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x));
`,RJ=xe({opSnippet:EJ}),iF={kernelName:gn,backendName:"webgl",kernelFunc:RJ};var DJ=Fo+`
2022-11-18 17:13:29 +01:00
return exp(x);
2023-08-05 15:03:11 +02:00
`,AJ=`
2022-11-18 17:13:29 +01:00
vec4 result = exp(x);
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,vv=xe({opSnippet:DJ,packedOpSnippet:AJ,cpuKernelImpl:BR,dtype:"float32"}),uF={kernelName:yn,backendName:"webgl",kernelFunc:vv};function Jh(r){let{inputs:t,attrs:e,backend:o}=r,{dim:n}=e,{input:s}=t,a=s.shape.length,i=s.shape.slice(),p=n;return n<0&&(y.assert(-(a+1)<=n,()=>`Axis must be in the interval [${-(a+1)}, ${a}]`),p=a+n+1),i.splice(p,0,1),te({inputs:{x:s},backend:o,attrs:{shape:i}})}var pF={kernelName:na,backendName:"webgl",kernelFunc:Jh};var cF="return exp(x) - 1.0;",FJ=xe({opSnippet:cF,packedOpSnippet:cF,cpuKernelImpl:zR}),lF={kernelName:bn,backendName:"webgl",kernelFunc:FJ};var rm=class{constructor(t,e,o){this.variableNames=["real","imag"];let n=e[1];this.outputShape=e;let s=o?`2.0 * ${Math.PI}`:`-2.0 * ${Math.PI}`,a=o?`${n}.0`:"1.0",i;if(t==="real")i="return real * expR - imag * expI;";else if(t==="imag")i="return real * expI + imag * expR;";else throw new Error(`FFT component must be either "real" or "imag", got ${t}.`);this.userCode=`
2022-11-18 17:13:29 +01:00
const float exponentMultiplier = ${s};
float unaryOpComplex(float real, float expR, float imag, float expI) {
${i}
}
float mulMatDFT(int batch, int index) {
float indexRatio = float(index) / float(${n});
float exponentMultiplierTimesIndexRatio =
exponentMultiplier * indexRatio;
float result = 0.0;
for (int i = 0; i < ${n}; i++) {
// x = (-2|2 * PI / N) * index * i;
float x = exponentMultiplierTimesIndexRatio * float(i);
float expR = cos(x);
float expI = sin(x);
float real = getReal(batch, i);
float imag = getImag(batch, i);
result +=
unaryOpComplex(real, expR, imag, expI) / ${a};
}
return result;
}
void main() {
ivec2 coords = getOutputCoords();
setOutput(mulMatDFT(coords[0], coords[1]));
}
`}};function eg(r,t,e){let o=e.texData.get(r.dataId),n=y.sizeFromShape(r.shape),s=r.shape[r.shape.length-1],a=n/s,i=te({inputs:{x:r},backend:e,attrs:{shape:[a,s]}}),p=i.shape,u=new rm("real",p,t),c=new rm("imag",p,t),l=[{dataId:o.complexTensorInfos.real.dataId,dtype:o.complexTensorInfos.real.dtype,shape:p},{dataId:o.complexTensorInfos.imag.dataId,dtype:o.complexTensorInfos.imag.dtype,shape:p}],m=e.runWebGLProgram(u,l,"float32"),d=e.runWebGLProgram(c,l,"float32"),f=Pr({inputs:{real:m,imag:d},backend:e});e.disposeIntermediateTensorInfo(m),e.disposeIntermediateTensorInfo(d);let h=te({inputs:{x:f},backend:e,attrs:{shape:r.shape}});return e.disposeIntermediateTensorInfo(i),e.disposeIntermediateTensorInfo(f),h}function PJ(r){let{inputs:t,backend:e}=r,{input:o}=t;return eg(o,!1,e)}var mF={kernelName:Bi,backendName:"webgl",kernelFunc:PJ};var tg=class{constructor(t,e){this.outputShape=[],this.customUniforms=[{name:"value",type:"float"}],this.variableNames=["x"],this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
// Input can be obtained from uniform value.
setOutput(value);
}
`}};function bi(r){let{backend:t,attrs:e}=r,{shape:o,value:n}=e,{dtype:s}=e;if(s=s||y.inferDtype(n),s==="string"){let a=y.getArrayFromDType(s,y.sizeFromShape(o));return a.fill(n),t.makeTensorInfo(o,s,a)}else{let a=new tg(o,n),i=[[n]];return t.runWebGLProgram(a,[],s,i)}}var dF={kernelName:sa,backendName:"webgl",kernelFunc:bi};var rg=class{constructor(t){this.variableNames=["Image"],this.outputShape=[];let e=t[2];this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int x = coords[2];
2023-08-05 15:03:11 +02:00
int coordX = ${e} - x - 1;
2022-11-18 17:13:29 +01:00
float outputValue;
2023-08-05 15:03:11 +02:00
if(coordX >= 0 && coordX < ${e}) {
2022-11-18 17:13:29 +01:00
outputValue = getImage(coords[0], coords[1], coordX, coords[3]);
} else {
outputValue = getImage(coords[0], coords[1], coords[2], coords[3]);
}
setOutput(outputValue);
}
`}};var fF={kernelName:Cn,backendName:"webgl",kernelFunc:({inputs:r,backend:t})=>{let{image:e}=r,o=t,n=new rg(e.shape);return o.runWebGLProgram(n,[e],e.dtype)}};var hF="return floor(x);",OJ=xe({opSnippet:hF,packedOpSnippet:hF,cpuKernelImpl:VR}),gF={kernelName:wn,backendName:"webgl",kernelFunc:OJ};var MJ=`
2022-11-18 17:13:29 +01:00
float s = sign(a) * sign(b);
int ia = round(a);
int ib = round(b);
if (ib != 0) {
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
return float(idiv(ia, ib, s));
} else {
return NAN;
}
2023-08-05 15:03:11 +02:00
`,LJ=`
2022-11-18 17:13:29 +01:00
ivec4 ia = round(a);
ivec4 ib = round(b);
bvec4 cond = notEqual(ib, ivec4(0));
ivec4 result = ivec4(0);
vec4 s = sign(a) * sign(b);
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
if (cond[0]) {
result[0] = idiv(ia[0], ib[0], s[0]);
}
if (cond[1]) {
result[1] = idiv(ia[1], ib[1], s[1]);
}
if (cond[2]) {
result[2] = idiv(ia[2], ib[2], s[2]);
}
if (cond[3]) {
result[3] = idiv(ia[3], ib[3], s[3]);
}
return vec4(result);
`,BJ=nt({opSnippet:MJ,packedOpSnippet:LJ,dtype:"int32"}),xF={kernelName:Sn,backendName:"webgl",kernelFunc:BJ};var og=class{constructor(t){this.variableNames=["A"];let e=It(),[o,n]=t;this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
int texR = coords[0];
int texC = coords[1];
int depth = coords[2];
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${n}.0, ${o}.0);
2023-08-05 15:03:11 +02:00
vec4 values = ${e.texture2D}(A, uv);
2022-11-18 17:13:29 +01:00
float value;
if (depth == 0) {
value = values.r;
} else if (depth == 1) {
value = values.g;
} else if (depth == 2) {
value = values.b;
} else if (depth == 3) {
value = values.a;
}
setOutput(floor(value * 255.0 + 0.5));
}
`}};var ng=class{constructor(t){this.variableNames=["A"],this.packedInputs=!1,this.packedOutput=!0;let e=It(),[o,n]=t;this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
int texR = coords[0];
int texC = coords[1];
int depth = coords[2];
vec4 result = vec4(0.);
for(int row=0; row<=1; row++) {
for(int col=0; col<=1; col++) {
texC = coords[1] + row;
depth = coords[2] + col;
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${n}.0, ${o}.0);
2023-08-05 15:03:11 +02:00
vec4 values = ${e.texture2D}(A, uv);
2022-11-18 17:13:29 +01:00
float value;
if (depth == 0) {
value = values.r;
} else if (depth == 1) {
value = values.g;
} else if (depth == 2) {
value = values.b;
} else if (depth == 3) {
value = values.a;
}
result[row * 2 + col] = floor(value * 255.0 + 0.5);
}
}
2023-08-05 15:03:11 +02:00
${e.output} = result;
2022-11-18 17:13:29 +01:00
}
`}};var yF={kernelName:Eu,backendName:"webgl",kernelFunc:zJ},Gc,kv=A().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");function zJ(r){let{inputs:t,backend:e,attrs:o}=r,{pixels:n}=t,{numChannels:s}=o,a=typeof HTMLVideoElement!="undefined"&&n instanceof HTMLVideoElement,i=typeof HTMLImageElement!="undefined"&&n instanceof HTMLImageElement,[p,u]=a?[n.videoWidth,n.videoHeight]:[n.width,n.height],c=[u,p],l=[u,p,s];if(i||a){let h=A().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");(Gc==null||h!==kv)&&(kv=h,Gc=document.createElement("canvas").getContext("2d",{willReadFrequently:kv})),Gc.canvas.width=p,Gc.canvas.height=u,Gc.drawImage(n,0,0,p,u),n=Gc.canvas}let m=e.makeTensorInfo(c,"int32");e.texData.get(m.dataId).usage=mr.PIXELS,e.gpgpu.uploadPixelDataToTexture(e.getTexture(m.dataId),n);let d=A().getBool("WEBGL_PACK")?new ng(l):new og(l),f=e.runWebGLProgram(d,[m],"int32");return e.disposeData(m.dataId),f}function VJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=t,{strides:p,pad:u,dataFormat:c,dilations:l,dimRoundingMode:m,activation:d,leakyreluAlpha:f}=o,h=w.convertConv2DDataFormat(c),g=w.computeConv2DInfo(n.shape,s.shape,p,l,u,m,!1,h),x,b=[],C=a!=null,S=i!=null,k=d==="leakyrelu",_=()=>{let R=[n,s],D=(P,O)=>{if(O==="NCHW"&&P.shape.length===1&&P.shape[0]!==1){let M=te({inputs:{x:P},backend:e,attrs:{shape:[P.shape[0],1,1]}});return b.push(M),M}return P};if(C&&R.push(D(a,c)),S&&R.push(D(i,c)),k){let P=e.makeTensorInfo([],"float32",y.createScalarValue(f,"float32"));R.push(P),b.push(P)}return R};if(g.filterHeight===1&&g.filterWidth===1&&g.dilationHeight===1&&g.dilationWidth===1&&g.strideHeight===1&&g.strideWidth===1&&(g.padInfo.type==="SAME"||g.padInfo.type==="VALID"))x=Bh({x:n,filter:s,convInfo:g,backend:e,bias:a,activation:d,preluActivationWeights:i,leakyreluAlpha:f});else if(g.strideWidth<=2&&h==="channelsLast"&&A().getBool("WEBGL_EXP_CONV")){let R=d?xi(d,!0):null,D=new Vc(g,C,R,S,k),P=[[g.padInfo.top,g.padInfo.left],[g.strideHeight,g.strideWidth],[g.dilationHeight,g.dilationWidth],[g.inHeight,g.inWidth]],O=_();x=e.runWebGLProgram(D,O,"float32",P)}else if(A().getBool("WEBGL_CONV_IM2COL"))x=zh({x:n,filter:s,convInfo:g,backend:e,bias:a,activation:d,preluActivationWeights:i,leakyreluAlpha:f});else{let R=d?xi(d,!1):null,D=new zc(g,C,R,S,k),P=_();x=e.runWebGLProgram(D,P,"float32")}let E=te({inputs:{x},backend:e,attrs:{shape:g.outShape}});return b.push(x),b.forEach(R=>e.disposeIntermediateTensorInfo(R)),E}var bF={kernelName:Io,backendName:"webgl",kernelFunc:VJ};function WJ(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=t,{strides:p,pad:u,dilations:c,dimRoundingMode:l,activation:m,leakyreluAlpha:d}=o,f=[],h=c;h==null&&(h=[1,1]),y.assert(w.eitherStridesOrDilationsAreOne(p,h),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${p} and dilations '${h}'`);let g=w.computeConv2DInfo(n.shape,s.shape,p,h,u,l,!0),x=A().getBool("WEBGL_PACK_DEPTHWISECONV")&&g.strideWidth<=2&&g.outChannels/g.inChannels===1,b=m?xi(m,x):null,C=[n,s],S=a!=null,k=i!=null,_=m==="leakyrelu";if(S&&C.push(a),k&&C.push(i),_){let P=e.makeTensorInfo([],"float32",y.createScalarValue(d,"float32"));C.push(P),f.push(P)}let E;x?E=new Uc(g,S,b,k,_):E=new Wc(g,S,b,k,_);let R=[[g.padInfo.top,g.padInfo.left],[g.strideHeight,g.strideWidth],[g.dilationHeight,g.dilationWidth],[g.inHeight,g.inWidth]],D=e.runWebGLProgram(E,C,"float32",R);return f.forEach(P=>e.disposeIntermediateTensorInfo(P)),D}var CF={kernelName:vo,backendName:"webgl",kernelFunc:WJ};var sg=class{constructor(t,e,o,n){this.sliceDim=t,this.strides=e,this.paramsShape=n,this.variableNames=["x","indices"],this.outputShape=o;let s=Re(o.length),a=`
2022-11-18 17:13:29 +01:00
int index;`;for(let i=0;i<this.sliceDim;i++)a+=`
index = round(getIndices(coords[0], ${i}));
out_of_bounds = out_of_bounds || index < 0;
out_of_bounds = out_of_bounds || index >= ${this.paramsShape[i]};
flattenIndex += index * ${this.strides[i]};`;this.userCode=`
void main() {
${s} coords = getOutputCoords();
int flattenIndex = 0;
bool out_of_bounds = false;
${a}
setOutput(out_of_bounds ? 0.0 : getX(flattenIndex, coords[1]));
}
`}};function UJ(r){let{inputs:t,backend:e}=r,{params:o,indices:n}=t,s=n.shape,a=s[s.length-1],i=y.sizeFromShape(o.shape),[p,u,c,l]=w.prepareAndValidate(o,n),m=te({inputs:{x:n},backend:e,attrs:{shape:[u,a]}}),d=te({inputs:{x:o},backend:e,attrs:{shape:[y.sizeFromShape(o.shape)/c,c]}});if(e.shouldExecuteOnCPU([o,n])||o.dtype==="string"){let x=e.readSync(n.dataId),b=e.bufferSync(o),C=WR(x,b,o.dtype,u,a,c,l,o.shape,i);return e.makeTensorInfo(p,o.dtype,C.values)}let f=new sg(a,l,[u,c],o.shape),h=e.runWebGLProgram(f,[d,m],d.dtype),g=te({inputs:{x:h},backend:e,attrs:{shape:p}});return e.disposeIntermediateTensorInfo(m),e.disposeIntermediateTensorInfo(d),e.disposeIntermediateTensorInfo(h),g}var wF={kernelName:vn,backendName:"webgl",kernelFunc:UJ};var ag=class{constructor(t,e){this.variableNames=["A","indices"],this.outputShape=e,this.rank=e.length;let o=Re(this.rank),n=GJ(t,2);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o} resRC = getOutputCoords();
int index = int(getIndices(resRC.x, resRC.z));
2023-08-05 15:03:11 +02:00
float inBounds = (index >= 0) && (index < ${t[2]}) ? 1.0 : 0.0;
2022-11-18 17:13:29 +01:00
setOutput(inBounds * getA(${n}));
}
`}};function GJ(r,t){let e=["resRC.x","resRC.y","resRC.z","resRC.w"],o=[];for(let n=0;n<r.length;n++)n===2?o.push("index"):o.push(`${e[n]}`);return o.join()}function Nv(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,indices:s}=t,{axis:a,batchDims:i}=o,p=y.parseAxisParam(a,n.shape)[0];if(A().get("DEBUG")){let b=e.readSync(s.dataId),C=n.shape[p];for(let S=0;S<b.length;++S){let k=b[S];y.assert(k<=C-1&&k>=0,()=>`GatherV2: the index value ${k} is not in [0, ${C-1}]`)}}let u=w.segment_util.collectGatherOpShapeInfo(n,s,p,i),c=y.sizeFromShape(s.shape),l=[],m=te({inputs:{x:n},backend:e,attrs:{shape:[u.batchSize,u.outerSize,u.dimSize,u.sliceSize]}}),d=te({inputs:{x:s},backend:e,attrs:{shape:[u.batchSize,c/u.batchSize]}});l.push(m),l.push(d);let f=[u.batchSize,u.outerSize,c/u.batchSize,u.sliceSize];if(e.shouldExecuteOnCPU([n,s])||n.dtype==="string"){let b=e.bufferSync(d),C=e.bufferSync(m),S=UR(C,b,f);return l.forEach(k=>e.disposeIntermediateTensorInfo(k)),e.makeTensorInfo(u.outputShape,S.dtype,S.values)}let h=new ag(m.shape,f),g=e.runWebGLProgram(h,[m,d],m.dtype);l.push(g);let x=te({inputs:{x:g},backend:e,attrs:{shape:u.outputShape}});return l.forEach(b=>e.disposeIntermediateTensorInfo(b)),x}var SF={kernelName:aa,backendName:"webgl",kernelFunc:Nv};var HJ="return float(a > b);",KJ=`
2022-11-18 17:13:29 +01:00
return vec4(greaterThan(a, b));
`,qJ=nt({opSnippet:HJ,packedOpSnippet:KJ,cpuKernelImpl:GR,dtype:"bool"}),IF={kernelName:kn,backendName:"webgl",kernelFunc:qJ};var jJ="return float(a >= b);",XJ=`
2022-11-18 17:13:29 +01:00
return vec4(greaterThanEqual(a, b));
`,YJ=nt({opSnippet:jJ,packedOpSnippet:XJ,dtype:"bool",cpuKernelImpl:HR}),vF={kernelName:Nn,backendName:"webgl",kernelFunc:YJ};function QJ(r){let{inputs:t,backend:e}=r,{input:o}=t;return eg(o,!0,e)}var kF={kernelName:zi,backendName:"webgl",kernelFunc:QJ};var ZJ="return float(!isnan(x) && !isinf(x));",JJ=xe({opSnippet:ZJ,dtype:"bool"}),NF={kernelName:Tn,backendName:"webgl",kernelFunc:JJ};var eee="return float(isinf(x));",tee=xe({opSnippet:eee,dtype:"bool"}),TF={kernelName:_n,backendName:"webgl",kernelFunc:tee};var ree="return float(isnan(x));",oee=xe({opSnippet:ree,dtype:"bool"}),_F={kernelName:$n,backendName:"webgl",kernelFunc:oee};var nee="return float(a < b);",see=`
2022-11-18 17:13:29 +01:00
return vec4(lessThan(a, b));
`,aee=nt({opSnippet:nee,packedOpSnippet:see,cpuKernelImpl:KR,dtype:"bool"}),$F={kernelName:Rn,backendName:"webgl",kernelFunc:aee};var iee="return float(a <= b);",uee=`
2022-11-18 17:13:29 +01:00
return vec4(lessThanEqual(a, b));
`,pee=nt({opSnippet:iee,packedOpSnippet:uee,cpuKernelImpl:qR,dtype:"bool"}),EF={kernelName:Dn,backendName:"webgl",kernelFunc:pee};function cee(r){let{backend:t,attrs:e}=r,{start:o,stop:n,num:s}=e,a=jR(o,n,s);return t.makeTensorInfo([a.length],"float32",a)}var RF={kernelName:An,backendName:"webgl",kernelFunc:cee};var lee=Fo+`
2022-11-18 17:13:29 +01:00
return x < 0.0 ? 0./0. : log(x);
2023-08-05 15:03:11 +02:00
`,mee=`
2022-11-18 17:13:29 +01:00
vec4 result = log(x);
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : (x.r < 0.0 ? 0./0. : result.r);
result.g = isNaN.g ? x.g : (x.g < 0.0 ? 0./0. : result.g);
result.b = isNaN.b ? x.b : (x.b < 0.0 ? 0./0. : result.b);
result.a = isNaN.a ? x.a : (x.a < 0.0 ? 0./0. : result.a);
return result;
`,dee=xe({opSnippet:lee,packedOpSnippet:mee,cpuKernelImpl:XR}),DF={kernelName:Fn,backendName:"webgl",kernelFunc:dee};var fee=Fo+`
2022-11-18 17:13:29 +01:00
return log(1.0 + x);
`,hee=xe({opSnippet:fee}),AF={kernelName:Pn,backendName:"webgl",kernelFunc:hee};var gee="return float(a >= 1.0 && b >= 1.0);",xee=`
2022-11-18 17:13:29 +01:00
return vec4(
vec4(greaterThanEqual(a, vec4(1.0))) *
vec4(greaterThanEqual(b, vec4(1.0))));
`,yee=nt({opSnippet:gee,packedOpSnippet:xee,dtype:"bool"}),FF={kernelName:On,backendName:"webgl",kernelFunc:yee};var bee="return float(!(x >= 1.0));",Cee=xe({opSnippet:bee}),PF={kernelName:Mn,backendName:"webgl",kernelFunc:Cee};var wee="return float(a >= 1.0 || b >= 1.0);",See=`
2022-11-18 17:13:29 +01:00
return min(
vec4(greaterThanEqual(a, vec4(1.0))) +
vec4(greaterThanEqual(b, vec4(1.0))),
vec4(1.0));
`,Iee=nt({opSnippet:wee,packedOpSnippet:See,dtype:"bool"}),OF={kernelName:Ln,backendName:"webgl",kernelFunc:Iee};var ig=class{constructor(t,e,o,n,s){this.variableNames=["x"],this.outputShape=[];let a=e,i=t[3]-1;this.outputShape=t;let p,u=`float(${o}) + float(${n}) * sum`;s===.5?p=`inversesqrt(${u})`:s===1?p=`1.0/(${u})`:p=`exp(log(${u}) * float(-${s}));`,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int r = coords[1];
int c = coords[2];
int d = coords[3];
float x = getX(b, r, c, d);
float sum = 0.0;
for (int j = -${a}; j <= ${a}; j++) {
int idx = d + j;
if (idx >= 0 && idx <= ${i}) {
float z = getX(b, r, c, idx);
sum += z * z;
}
}
float val = x * ${p};
setOutput(val);
}
`}};var ug=class{constructor(t,e,o,n,s){this.variableNames=["x"],this.outputShape=[],this.packedInputs=!0,this.packedOutput=!0;let a=e,i=t[3]-1;this.outputShape=t;let p,u=`float(${o}) + float(${n}) * sum`;s===.5?p=`inversesqrt(${u})`:s===1?p=`1.0/(${u})`:p=`exp(log(${u}) * float(-${s}));`,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords.x;
int r = coords.y;
int c = coords.z;
int d = coords.w;
bool hasNextCol = d < ${this.outputShape[3]};
bool hasNextRow = c < ${this.outputShape[2]};
vec4 sum = vec4(0.);
vec4 xFragAtOutputCoords = getX(b, r, c, d);
vec4 xAtOutputCoords = vec4(
getChannel(xFragAtOutputCoords, vec2(c, d)),
hasNextCol ?
getChannel(xFragAtOutputCoords, vec2(c, d + 1)) : 0.0,
hasNextRow ?
getChannel(xFragAtOutputCoords , vec2(c + 1, d)) : 0.0,
(hasNextRow && hasNextCol) ?
getChannel(xFragAtOutputCoords, vec2(c + 1, d + 1)) : 0.0
);
int firstChannel = d - ${a};
vec2 cache = vec2(0.);
if(firstChannel >= 0){
vec4 firstChannelFrag = getX(b, r, c, firstChannel);
cache.x = getChannel(firstChannelFrag, vec2(c, firstChannel));
if(hasNextRow){
cache.y = getChannel(firstChannelFrag, vec2(c + 1, firstChannel));
}
}
ivec2 depth = ivec2(d, d + 1);
for (int j = - ${a}; j <= ${a}; j++) {
ivec2 idx = depth + j;
bvec2 aboveLowerBound = greaterThanEqual(idx, ivec2(0));
bvec2 belowUpperBound = lessThanEqual(idx, ivec2(${i}));
bool depthInRange = aboveLowerBound.x && belowUpperBound.x;
bool depthPlusOneInRange = aboveLowerBound.y && belowUpperBound.y;
if(depthInRange || depthPlusOneInRange){
vec4 z = vec4(0.);
vec4 xFragAtCurrentDepth;
z.xz = cache.xy;
if(depthPlusOneInRange && hasNextCol){
xFragAtCurrentDepth = idx.y != d ?
getX(b, r, c, idx.y) : xFragAtOutputCoords;
z.y = getChannel(xFragAtCurrentDepth, vec2(c, idx.y));
if(hasNextRow){
z.w = getChannel(xFragAtCurrentDepth, vec2(c + 1, idx.y));
}
}
cache.xy = z.yw;
sum += z * z;
}
}
vec4 result = xAtOutputCoords * ${p};
setOutput(result);
}
`}};var vee=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{depthRadius:s,bias:a,alpha:i,beta:p}=o,u=A().getBool("WEBGL_PACK_NORMALIZATION")?new ug(n.shape,s,a,i,p):new ig(n.shape,s,a,i,p);return e.runWebGLProgram(u,[n],n.dtype)},MF={kernelName:Bn,backendName:"webgl",kernelFunc:vee};var pg=class{constructor(t,e,o,n,s){this.variableNames=["inputImage","outputImage","dy"],this.outputShape=[],this.outputShape=t,this.depth=t[3],this.depthRadius=e,this.bias=o,this.alpha=n,this.beta=s,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int r = coords[1];
int c = coords[2];
float result = 0.0;
for (int d = 0; d < ${this.depth}; ++d) {
2023-08-05 15:03:11 +02:00
int depthBegin = int(max(0.0, float(d - ${e})));
2022-11-18 17:13:29 +01:00
int depthEnd = int(min(float(${this.depth}),
2023-08-05 15:03:11 +02:00
float(d + ${e} + 1)));
2022-11-18 17:13:29 +01:00
const int MIN_DEPTH_BEGIN = 0;
const int MAX_DEPTH_END = ${this.depth};
float norm = 0.0;
for (int k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; ++k) {
if (k < depthBegin){
continue;
}
else if (k >= depthBegin && k < depthEnd) {
norm += getInputImage(b, r, c, k) * getInputImage(b, r, c, k);
}
else {
break;
}
}
norm = float(${n}) * norm + float(${o});
for(int k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; ++k){
if (k < depthBegin){
continue;
}
else if (k >= depthBegin && k < depthEnd){
float dyi = -2.0 * float(${n})
* float(${s})
2023-01-06 19:23:06 +01:00
* getInputImage(b, r, c, k) * getOutputImage(b, r, c, d)
2022-11-18 17:13:29 +01:00
/ norm;
if (k == d) {
dyi += pow(norm, -1.0 * ${s});
}
if (k == coords[3]) {
dyi *= getDy(b, r, c, d);
result += dyi;
}
}
else {
break;
}
}
}
setOutput(result);
}
`}};var kee=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n,y:s,dy:a}=t,{depthRadius:i,bias:p,alpha:u,beta:c}=o,l=new pg(n.shape,i,p,u,c);return e.runWebGLProgram(l,[n,s,a],n.dtype)},LF={kernelName:Ya,backendName:"webgl",kernelFunc:kee};function BF(r,t,e,o){let n=y.sizeFromShape(t),a=y.sizeFromShape(r.shape)/n,i=te({inputs:{x:r},attrs:{shape:[a,n]},backend:o}),p=Xr(i,r.dtype,"max",o),u=te({inputs:{x:p},attrs:{shape:e},backend:o});return o.disposeIntermediateTensorInfo(i),o.disposeIntermediateTensorInfo(p),u}function Tv(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{reductionIndices:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=w.getAxesPermutation(u,i),l=c!=null,m=e.shouldExecuteOnCPU([n]),d=n;if(l){if(m){let C=e.texData.get(d.dataId).values,S=new Array(i);for(let E=0;E<S.length;E++)S[E]=n.shape[c[E]];let k=yp(C,n.shape,n.dtype,c,S);d=e.makeTensorInfo(S,n.dtype);let _=e.texData.get(d.dataId);_.values=k}else d=gu(n,c,e);u=w.getInnerMostAxes(u.length,i)}w.assertAxesAreInnerMostDims("max",u,i);let[f,h]=w.computeOutAndReduceShapes(d.shape,u),g=f;a&&(g=w.expandShapeToKeepDim(f,p));let x;if(m){let C=e.texData.get(d.dataId).values,S=YR(C,y.sizeFromShape(h),g,n.dtype);x=e.makeTensorInfo(g,n.dtype);let k=e.texData.get(x.dataId);k.values=S}else x=BF(d,h,g,e);return l&&e.disposeIntermediateTensorInfo(d),x}var zF={kernelName:zn,backendName:"webgl",kernelFunc:Tv};var Nee=Mc+`
2022-11-18 17:13:29 +01:00
return max(a, b);
2023-08-05 15:03:11 +02:00
`,Tee=`
2022-11-18 17:13:29 +01:00
vec4 result = vec4(max(a, b));
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
`+jr+`
2022-11-18 17:13:29 +01:00
return result;
`,_ee=nt({opSnippet:Nee,packedOpSnippet:Tee,cpuKernelImpl:QR}),VF={kernelName:Vn,backendName:"webgl",kernelFunc:_ee};function $ee(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t;Vs(n,"maxPool");let{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1;y.assert(w.eitherStridesOrDilationsAreOne(a,u),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${u}'`);let c=w.computePool2DInfo(n.shape,s,a,u,i,p);if(c.filterWidth===1&&c.filterHeight===1&&y.arraysEqual(c.inShape,c.outShape))return Dt({inputs:{x:n},backend:e});let l=new Us(c,"max",!1);return e.runWebGLProgram(l,[n],n.dtype)}var WF={kernelName:Wn,backendName:"webgl",kernelFunc:$ee};function Eee(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{filterSize:s,strides:a,pad:i,dataFormat:p,dimRoundingMode:u}=o,c=[1,1,1],l=w.computePool3DInfo(n.shape,s,a,c,i,u,p),m=new xu(l,"max",!1);return e.runWebGLProgram(m,[n],n.dtype)}var UF={kernelName:ia,backendName:"webgl",kernelFunc:Eee};var cg=class{constructor(t){this.variableNames=["dy","maxPos"],this.outputShape=t.inShape;let e=t.strideHeight,o=t.strideWidth,n=t.dilationHeight,s=t.effectiveFilterHeight,a=t.effectiveFilterWidth,i=s-1-t.padInfo.top,p=a-1-t.padInfo.left,u=s*a-1;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${i}, ${p});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 dyRCCorner = coords.yz - pads;
int dyRCorner = dyRCCorner.x;
int dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${s};
wR += ${n}) {
2023-08-05 15:03:11 +02:00
float dyR = float(dyRCorner + wR) / ${e}.0;
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 || fract(dyR) > 0.0) {
2022-11-18 17:13:29 +01:00
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${a}; wC++) {
float dyC = float(dyCCorner + wC) / ${o}.0;
2023-08-05 15:03:11 +02:00
if (dyC < 0.0 || dyC >= ${t.outWidth}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(b, idyR, idyC, d);
int maxPosValue = ${u} - int(getMaxPos(b, idyR, idyC, d));
// Get the current value, check it against the value from the
// position matrix.
int curPosValue = wR * ${a} + wC;
float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
dotProd += dyValue * mask;
}
}
setOutput(dotProd);
}
`}},lg=class{constructor(t){this.variableNames=["dy","maxPos"],this.outputShape=t.inShape;let e=t.strideDepth,o=t.strideHeight,n=t.strideWidth,s=t.dilationDepth,a=t.dilationHeight,i=t.dilationWidth,p=t.effectiveFilterDepth,u=t.effectiveFilterHeight,c=t.effectiveFilterWidth,l=p-1-t.padInfo.front,m=u-1-t.padInfo.top,d=c-1-t.padInfo.left,f=p*u*c-1;this.userCode=`
2022-11-20 22:20:02 +01:00
const ivec3 pads = ivec3(${l}, ${m}, ${d});
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyDCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, ch) with pos mask(:, :, :, d) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wD = 0; wD < ${p};
wD += ${s}) {
2023-08-05 15:03:11 +02:00
float dyD = float(dyDCorner + wD) / ${e}.0;
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
if (dyD < 0.0 || dyD >= ${t.outDepth}.0 || fract(dyD) > 0.0) {
2022-11-18 17:13:29 +01:00
continue;
}
int idyD = int(dyD);
for (int wR = 0; wR < ${u};
wR += ${a}) {
float dyR = float(dyRCorner + wR) / ${o}.0;
2023-08-05 15:03:11 +02:00
if (dyR < 0.0 || dyR >= ${t.outHeight}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${c};
wC += ${i}) {
float dyC = float(dyCCorner + wC) / ${n}.0;
2023-08-05 15:03:11 +02:00
if (dyC < 0.0 || dyC >= ${t.outWidth}.0 ||
2022-11-18 17:13:29 +01:00
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(batch, idyD, idyR, idyC, ch);
2022-11-20 22:20:02 +01:00
int maxPosValue = ${f} -
2022-11-18 17:13:29 +01:00
int(getMaxPos(batch, idyD, idyR, idyC, ch));
// Get the current value, check it against the value from the
// position matrix.
int curPosValue =
wD * ${u} * ${c} +
wR * ${c} + wC;
float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
dotProd += dyValue * mask;
}
}
}
setOutput(dotProd);
}
`}};function Ree(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s}=t,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=[1,1,1],m=w.computePool3DInfo(a.shape,i,p,l,u,c),d=new xu(m,"max",!0),f=e.runWebGLProgram(d,[a],a.dtype),h=new lg(m),g=e.runWebGLProgram(h,[n,f],a.dtype);return e.disposeIntermediateTensorInfo(f),g}var GF={kernelName:Ui,backendName:"webgl",kernelFunc:Ree};function Dee(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s,output:a}=t,i=s;Vs([s,a],"maxPoolGrad");let{filterSize:p,strides:u,pad:c,dimRoundingMode:l}=o,m=w.computePool2DInfo(i.shape,p,u,1,c,l),d=!0,f=new Us(m,"max",d),h=e.runWebGLProgram(f,[i],i.dtype),g=new cg(m),x=e.runWebGLProgram(g,[n,h],i.dtype);return e.disposeIntermediateTensorInfo(h),x}var HF={kernelName:Wi,backendName:"webgl",kernelFunc:Dee};function KF(r,t,e,o){let n=new Us(e,"max",!1),s=o.runWebGLProgram(n,[r],"float32");n=new Us(e,"max",!0,!0,t);let a=o.runWebGLProgram(n,[r],"float32");return[s,a]}var qF={kernelName:ua,backendName:"webgl",kernelFunc:({inputs:r,attrs:t,backend:e})=>{let{x:o}=r,{filterSize:n,strides:s,pad:a,includeBatchInIndex:i}=t,p=e;y.assert(o.shape.length===4,()=>`Error in maxPool: input must be rank 4 but got rank ${o.shape.length}.`);let u=[1,1];y.assert(w.eitherStridesOrDilationsAreOne(s,u),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${s} and dilations '${u}'`);let c=w.computePool2DInfo(o.shape,n,s,u,a),[l,m]=KF(o,i,c,p);return[l,m]}};function jF(r,t,e,o){let n=y.sizeFromShape(t),a=y.sizeFromShape(r.shape)/n,i=te({inputs:{x:r},attrs:{shape:[a,n]},backend:o}),p=Xr(i,"float32","mean",o),u=te({inputs:{x:p},attrs:{shape:e},backend:o});return o.disposeIntermediateTensorInfo(i),o.disposeIntermediateTensorInfo(p),u}var XF={kernelName:Un,backendName:"webgl",kernelFunc:({inputs:r,attrs:t,backend:e})=>{let{x:o}=r,{keepDims:n,axis:s}=t,a=e,i=o.shape.length,p=y.parseAxisParam(s,o.shape),u=p,c=w.getAxesPermutation(u,i),l=c!=null,m=a.shouldExecuteOnCPU([o]),d=[],f=o;if(l){if(m){let S=a.texData.get(f.dataId).values,k=new Array(i);for(let R=0;R<k.length;R++)k[R]=o.shape[c[R]];let _=yp(S,o.shape,o.dtype,c,k);f=a.makeTensorInfo(k,o.dtype);let E=a.texData.get(f.dataId);E.values=_}else f=gu(o,c,a);d.push(f),u=w.getInnerMostAxes(u.length,i)}w.assertAxesAreInnerMostDims("sum",u,i);let[h,g]=w.computeOutAndReduceShapes(f.shape,u),x=h;n&&(x=w.expandShapeToKeepDim(h,p));let b=jF(f,g,x,a);for(let C of d)a.disposeIntermediateTensorInfo(C);return b}};function Aee(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=w.getAxesPermutation(u,i),l=n;c!=null&&(l=bt({inputs:{x:n},backend:e,attrs:{perm:c}}),u=w.getInnerMostAxes(u.length,n.shape.length)),w.assertAxesAreInnerMostDims("min",u,i);let[m,d]=w.computeOutAndReduceShapes(l.shape,u),f=y.sizeFromShape(d),h=te({inputs:{x:l},backend:e,attrs:{shape:[-1,f]}}),g=Xr(h,h.dtype,"min",e),x;if(a){let b=w.expandShapeToKeepDim(m,p);x=te({inputs:{x:g},backend:e,attrs:{shape:b}})}else x=te({inputs:{x:g},backend:e,attrs:{shape:m}});return e.disposeIntermediateTensorInfo(h),e.disposeIntermediateTensorInfo(g),c!=null&&e.disposeIntermediateTensorInfo(l),x}var YF={kernelName:Gn,backendName:"webgl",kernelFunc:Aee};var Fee=Mc+`
2022-11-18 17:13:29 +01:00
return min(a, b);
2023-08-05 15:03:11 +02:00
`,Pee=`
2022-11-18 17:13:29 +01:00
vec4 result = vec4(min(a, b));
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
`+jr+`
2022-11-18 17:13:29 +01:00
return result;
`,Oee=nt({opSnippet:Fee,packedOpSnippet:Pee,cpuKernelImpl:ZR}),QF={kernelName:Hn,backendName:"webgl",kernelFunc:Oee};var mg=class{constructor(t,e,o){this.variableNames=["x"],this.outputShape=e.map((c,l)=>c[0]+t[l]+c[1]);let n=t.length,s=Re(n),a=e.map(c=>c[0]).join(","),i=e.map((c,l)=>c[0]+t[l]).join(","),p=["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,n),u=o==="reflect"?0:1;if(n===1){this.userCode=`
2022-11-18 17:13:29 +01:00
int start = ${a};
int end = ${i};
void main() {
int outC = getOutputCoords();
if (outC < start) {
outC = start * 2 - outC - ${u};
} else if(outC >= end) {
outC = (end - 1) * 2 - outC + ${u};
}
setOutput(getX(outC - start));
}
`;return}this.userCode=`
${s} start = ${s}(${a});
${s} end = ${s}(${i});
void main() {
${s} outC = getOutputCoords();
for (int i = 0; i < ${n}; i++) {
if (outC[i] < start[i]) {
outC[i] = start[i] * 2 - outC[i] - ${u};
} else if(outC[i] >= end[i]) {
outC[i] = (end[i] - 1) * 2 - outC[i] + ${u};
}
}
${s} coords = outC - start;
setOutput(getX(${p}));
}
`}};var dg=class{constructor(t,e,o){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=e.map((f,h)=>f[0]+t[h]+f[1]);let n=t.length,s=Re(n),a=e.map(f=>f[0]).join(","),i=e.map((f,h)=>f[0]+t[h]).join(","),p=Rt("rc",n),u=Rt("source",n),c=`${p[n-1]} < ${this.outputShape[n-1]}`,l=n===1?"source":`vec2(${u.slice(-2).join()})`,m=o==="reflect"?0:1,d="";if(n===1){let f=`
2022-11-18 17:13:29 +01:00
${s} source = rc;
if (source < start) {
source = start * 2 - source - ${m};
} else if (source >= end) {
source = (end - 1) * 2 - source + ${m};
}
source -= start;
2022-11-20 22:20:02 +01:00
`;d=`
2022-11-18 17:13:29 +01:00
${s} rc = outputLoc;
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[0] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[1] = getChannel(getX(${u.join()}), ${l});
}
2022-11-20 22:20:02 +01:00
`}else{let f=`
2022-11-18 17:13:29 +01:00
${s} source = rc;
${s} lt = ${s}(lessThan(source, start));
${s} gte = ${s}(greaterThanEqual(source, end));
${s} orig = 1 - (lt + gte);
source = orig * source +
lt * (start * 2 - source - ${m}) +
gte * ((end - 1) * 2 - source + ${m});
source -= start;
2022-11-20 22:20:02 +01:00
`;d=`
2022-11-18 17:13:29 +01:00
${s} rc = outputLoc;
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[0] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[1] = getChannel(getX(${u.join()}), ${l});
}
rc = outputLoc;
${p[n-2]} += 1;
if(${p[n-2]} < ${this.outputShape[n-2]}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[2] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[3] = getChannel(getX(${u.join()}), ${l});
}
}
`}this.userCode=`
const ${s} start = ${s}(${a});
const ${s} end = ${s}(${i});
void main() {
${s} outputLoc = getOutputCoords();
vec4 result = vec4(0.);
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
`}};var Mee=({inputs:r,backend:t,attrs:e})=>{let{x:o}=r,{paddings:n,mode:s}=e,a=A().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new dg(o.shape,n,s):new mg(o.shape,n,s);return t.runWebGLProgram(a,[o],o.dtype)},ZF={kernelName:Kn,backendName:"webgl",kernelFunc:Mee};var Lee=`if (b == 0.0) return NAN;
2023-08-05 15:03:11 +02:00
return mod(a, b);`,Bee=`
2022-11-18 17:13:29 +01:00
vec4 result = mod(a, b);
bvec4 isNaN = equal(b, vec4(0.0));
`+jr+`
2022-11-18 17:13:29 +01:00
return result;
`,zee=nt({opSnippet:Lee,packedOpSnippet:Bee}),JF={kernelName:qn,backendName:"webgl",kernelFunc:zee};var fg=class{constructor(t,e,o){this.variableNames=["probs"],this.customUniforms=[{name:"seed",type:"float"}],this.outputShape=[t,o],this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
float r = random(seed);
float cdf = 0.0;
2023-08-05 15:03:11 +02:00
for (int i = 0; i < ${e-1}; i++) {
2022-11-18 17:13:29 +01:00
cdf += getProbs(batch, i);
if (r < cdf) {
setOutput(float(i));
return;
}
}
// If no other event happened, last event happened.
2023-08-05 15:03:11 +02:00
setOutput(float(${e-1}));
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`}};var Vee=`
2022-11-18 17:13:29 +01:00
if (a == b) {
return 1.0;
};
2023-08-05 15:03:11 +02:00
return a / b;`,Wee=`
2022-11-18 17:13:29 +01:00
// vec4 one = vec4(equal(a, b));
// return one + (vec4(1.0) - one) * a / b;
vec4 result = a / b;
if(a.x == b.x) {
result.x = 1.;
}
if(a.y == b.y) {
result.y = 1.;
}
if(a.z == b.z) {
result.z = 1.;
}
if(a.w == b.w) {
result.w = 1.;
}
return result;
`,_v=nt({opSnippet:Vee,packedOpSnippet:Wee,checkOutOfBounds:!0}),e3={kernelName:fn,backendName:"webgl",kernelFunc:_v};var t3="return a - b;",$v=nt({opSnippet:t3,packedOpSnippet:t3,supportsComplex:!0,cpuKernelImpl:bD}),r3={kernelName:Ts,backendName:"webgl",kernelFunc:$v};function Ev(r){let{inputs:t,backend:e,attrs:o}=r,{logits:n}=t,{dim:s}=o,a=y.parseAxisParam([s],n.shape),i=Tv({inputs:{x:n},backend:e,attrs:{reductionIndices:a,keepDims:!1}}),p=w.expandShapeToKeepDim(i.shape,a),u=te({inputs:{x:i},backend:e,attrs:{shape:p}}),c=$v({inputs:{a:n,b:u},backend:e}),l=vv({inputs:{x:c},backend:e}),m=bp({inputs:{x:l},backend:e,attrs:{axis:a,keepDims:!1}}),d=te({inputs:{x:m},backend:e,attrs:{shape:p}}),f=_v({inputs:{a:l,b:d},backend:e});return e.disposeIntermediateTensorInfo(i),e.disposeIntermediateTensorInfo(u),e.disposeIntermediateTensorInfo(c),e.disposeIntermediateTensorInfo(l),e.disposeIntermediateTensorInfo(m),e.disposeIntermediateTensorInfo(d),f}var o3={kernelName:Is,backendName:"webgl",kernelFunc:Ev};function Uee(r){let{inputs:t,backend:e,attrs:o}=r,{logits:n}=t,{numSamples:s,seed:a,normalized:i}=o,p=i?n:Ev({inputs:{logits:n},backend:e,attrs:{dim:n.shape.length-1}}),u=p.shape[0],c=p.shape[1],l=new fg(u,c,s),m=[[a]],d=e.runWebGLProgram(l,[p],"int32",m);return i||e.disposeIntermediateTensorInfo(p),d}var n3={kernelName:jn,backendName:"webgl",kernelFunc:Uee};var Gee=Wt+`
2022-11-18 17:13:29 +01:00
return -x;
2023-08-05 15:03:11 +02:00
`,Hee=`
2022-11-18 17:13:29 +01:00
vec4 result = -x;
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`;function Kee(r){let{inputs:t,backend:e}=r,{x:o}=t;if(e.shouldExecuteOnCPU([o])){let s=e.texData.get(o.dataId),[a,i]=eD(s.values,o.shape,o.dtype);return e.makeTensorInfo(i,o.dtype,a)}let n;return A().getBool("WEBGL_PACK_UNARY_OPERATIONS")?n=new Ar(o.shape,Hee):n=new tr(o.shape,Gee),e.runWebGLProgram(n,[o],o.dtype)}var s3={kernelName:pa,backendName:"webgl",kernelFunc:Kee};var qee=Vt.nonMaxSuppressionV3Impl;function jee(r){w.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:t,backend:e,attrs:o}=r,{boxes:n,scores:s}=t,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p}=o,u=e.readSync(n.dataId),c=e.readSync(s.dataId),{selectedIndices:l}=qee(u,c,a,i,p);return e.makeTensorInfo([l.length],"int32",new Int32Array(l))}var a3={kernelName:Qn,backendName:"webgl",kernelFunc:jee};var Xee=Vt.nonMaxSuppressionV4Impl;function Yee(r){w.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:t,backend:e,attrs:o}=r,{boxes:n,scores:s}=t,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,padToMaxOutputSize:u}=o,c=e.readSync(n.dataId),l=e.readSync(s.dataId),{selectedIndices:m,validOutputs:d}=Xee(c,l,a,i,p,u);return[e.makeTensorInfo([m.length],"int32",new Int32Array(m)),e.makeTensorInfo([],"int32",new Int32Array([d]))]}var i3={kernelName:Qa,backendName:"webgl",kernelFunc:Yee};var Qee=Vt.nonMaxSuppressionV5Impl;function Zee(r){w.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:t,backend:e,attrs:o}=r,{boxes:n,scores:s}=t,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,softNmsSigma:u}=o,c=e.readSync(n.dataId),l=e.readSync(s.dataId),m=a,d=i,f=p,h=u,{selectedIndices:g,selectedScores:x}=Qee(c,l,m,d,f,h);return[e.makeTensorInfo([g.length],"int32",new Int32Array(g)),e.makeTensorInfo([x.length],"float32",new Float32Array(x))]}var u3={kernelName:Zn,backendName:"webgl",kernelFunc:Zee};var hg=class{constructor(t,e,o,n){this.variableNames=["indices"],this.outputShape=[t,e],this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int index = round(getIndices(coords.x));
setOutput(mix(float(${n}), float(${o}),
float(index == coords.y)));
}
`}};var Jee=r=>{let{inputs:t,backend:e,attrs:o}=r,{indices:n}=t,{dtype:s,depth:a,onValue:i,offValue:p}=o,u=y.sizeFromShape(n.shape),c=new hg(u,a,i,p),l=te({inputs:{x:n},backend:e,attrs:{shape:[u]}}),m=e.runWebGLProgram(c,[l],s);e.disposeIntermediateTensorInfo(l);let d=[...n.shape,a],f=te({inputs:{x:m},backend:e,attrs:{shape:d}});return e.disposeIntermediateTensorInfo(m),f},p3={kernelName:Jn,backendName:"webgl",kernelFunc:Jee};function om(r){let{inputs:t,backend:e}=r,{x:o}=t;if(o.dtype==="complex64"){let n=yi({inputs:{input:o},backend:e}),s=om({inputs:{x:n},backend:e}),a=wp({inputs:{input:o},backend:e}),i=om({inputs:{x:a},backend:e}),p=Pr({inputs:{real:s,imag:i},backend:e});return e.disposeIntermediateTensorInfo(n),e.disposeIntermediateTensorInfo(s),e.disposeIntermediateTensorInfo(a),e.disposeIntermediateTensorInfo(i),p}else return bi({attrs:{shape:o.shape,dtype:o.dtype,value:o.dtype==="string"?"":0},backend:e})}var c3={kernelName:Sa,backendName:"webgl",kernelFunc:om};function l3(r){let{inputs:t,backend:e}=r,{x:o}=t;if(o.dtype==="string")throw new Error("onesLike is not supported under string dtype");if(o.dtype==="complex64"){let n=yi({inputs:{input:o},backend:e}),s=l3({inputs:{x:n},backend:e}),a=wp({inputs:{input:o},backend:e}),i=om({inputs:{x:a},backend:e}),p=Pr({inputs:{real:s,imag:i},backend:e});return e.disposeIntermediateTensorInfo(n),e.disposeIntermediateTensorInfo(s),e.disposeIntermediateTensorInfo(a),e.disposeIntermediateTensorInfo(i),p}else return bi({attrs:{shape:o.shape,dtype:o.dtype,value:1},backend:e})}var m3={kernelName:ca,backendName:"webgl",kernelFunc:l3};function ete(r){let{inputs:t,backend:e,attrs:o}=r,{axis:n}=o;if(t.length===1)return Jh({inputs:{input:t[0]},backend:e,attrs:{dim:n}});let s=t[0].shape,a=t[0].dtype;t.forEach(c=>{y.assertShapesMatch(s,c.shape,"All tensors passed to stack must have matching shapes"),y.assert(a===c.dtype,()=>"All tensors passed to stack must have matching dtypes")});let i=[],p=t.map(c=>{let l=Jh({inputs:{input:c},backend:e,attrs:{dim:n}});return i.push(l),l}),u=Iv({inputs:p,backend:e,attrs:{axis:n}});return i.forEach(c=>e.disposeIntermediateTensorInfo(c)),u}var d3={kernelName:la,backendName:"webgl",kernelFunc:ete};var gg=class{constructor(t,e,o){this.variableNames=["x"],this.customUniforms=[{name:"value",type:"float"}],this.outputShape=e.map((u,c)=>u[0]+t[c]+u[1]);let n=t.length,s=Re(n),a=e.map(u=>u[0]).join(","),i=e.map((u,c)=>u[0]+t[c]).join(","),p=["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,n);if(n===1){this.userCode=`
2022-11-18 17:13:29 +01:00
int start = ${a};
int end = ${i};
void main() {
int outC = getOutputCoords();
if (outC < start || outC >= end) {
setOutput(value);
} else {
setOutput(getX(outC - start));
}
}
`;return}this.userCode=`
${s} start = ${s}(${a});
${s} end = ${s}(${i});
void main() {
${s} outC = getOutputCoords();
if (any(lessThan(outC, start)) || any(greaterThanEqual(outC, end))) {
setOutput(value);
} else {
${s} coords = outC - start;
setOutput(getX(${p}));
}
}
`}};var xg=class{constructor(t,e,o){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"value",type:"float"}],this.outputShape=e.map((h,g)=>h[0]+t[g]+h[1]);let n=t.length,s=Re(n),a=e.map(h=>h[0]).join(","),i=e.map((h,g)=>h[0]+t[g]).join(","),p=Rt("rc",n),u=Rt("source",n),c=`${p[n-1]} < ${this.outputShape[n-1]}`,l=n===1?"source":`vec2(${u.slice(-2).join()})`,m=[`${s} rc = outputLoc;`,`${p[n-1]} += 1;
2022-11-18 17:13:29 +01:00
if(${c}) {
`,n===1?"":`}
rc = outputLoc;
${p[n-2]} += 1;
if(${p[n-2]} < ${this.outputShape[n-2]}) {`,n===1?"":` ${p[n-1]} += 1;
2022-11-20 22:20:02 +01:00
if(${c}) {`],d=n===1?"rc < start || rc >= end":"any(lessThan(rc, start)) || any(greaterThanEqual(rc, end))",f="";for(let h=0,g=n===1?2:4;h<g;h++)f+=`
2022-11-18 17:13:29 +01:00
${m[h]}
2022-11-20 22:20:02 +01:00
if (${d}) {
2022-11-18 17:13:29 +01:00
result[${h}] = float(value);
} else {
${s} source = rc - start;
result[${h}] = getChannel(getX(${u.join()}), ${l});
}
2022-11-20 22:20:02 +01:00
`;f+=n===1?"} ":"}}",this.userCode=`
2022-11-18 17:13:29 +01:00
const ${s} start = ${s}(${a});
const ${s} end = ${s}(${i});
void main() {
${s} outputLoc = getOutputCoords();
vec4 result = vec4(0.);
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
`}};var Rv=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{paddings:s,constantValue:a}=o;if(y.sizeFromShape(n.shape)===0){let u=s.map((c,l)=>c[0]+n.shape[l]+c[1]);return bi({backend:e,attrs:{shape:u,value:a,dtype:n.dtype}})}let i=A().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new xg(n.shape,s,a):new gg(n.shape,s,a),p=[[a]];return e.runWebGLProgram(i,[n],n.dtype,p)},f3={kernelName:es,backendName:"webgl",kernelFunc:Rv};var tte=`
2022-11-18 17:13:29 +01:00
if(a < 0.0 && floor(b) < b){
return NAN;
}
if (b == 0.0) {
return 1.0;
}
return (round(mod(b, 2.0)) != 1) ?
pow(abs(a), b) : sign(a) * pow(abs(a), b);
2023-08-05 15:03:11 +02:00
`,rte=`
2022-11-18 17:13:29 +01:00
// isModRound1 has 1 for components with round(mod(b, 2.0)) == 1, 0 otherwise.
vec4 isModRound1 = vec4(equal(round(mod(b, 2.0)), ivec4(1)));
vec4 multiplier = sign(a) * isModRound1 + (vec4(1.0) - isModRound1);
vec4 result = multiplier * pow(abs(a), b);
// Ensure that a^0 = 1, including 0^0 = 1 as this correspond to TF and JS
bvec4 isExpZero = equal(b, vec4(0.0));
result.r = isExpZero.r ? 1.0 : result.r;
result.g = isExpZero.g ? 1.0 : result.g;
result.b = isExpZero.b ? 1.0 : result.b;
result.a = isExpZero.a ? 1.0 : result.a;
bvec4 isNaN1 = lessThan(a, vec4(0.0));
bvec4 isNaN2 = lessThan(floor(b), b);
bvec4 isNaN = bvec4(isNaN1.x && isNaN2.x, isNaN1.y && isNaN2.y, isNaN1.z && isNaN2.z, isNaN1.w && isNaN2.w);
`+jr+`
2022-11-18 17:13:29 +01:00
return result;
`,ote=nt({opSnippet:tte,packedOpSnippet:rte}),h3={kernelName:ts,backendName:"webgl",kernelFunc:ote};function nte(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o,i=n.shape.length,p=[],u=y.parseAxisParam(s,n.shape),c=u,l=w.getAxesPermutation(c,i),m=n;l!=null&&(m=bt({inputs:{x:n},backend:e,attrs:{perm:l}}),c=w.getInnerMostAxes(c.length,i),p.push(m)),w.assertAxesAreInnerMostDims("prod",c,i);let d;if(e.shouldExecuteOnCPU([m])){let f=e.texData.get(m.dataId).values,{outVals:h,outShape:g,outDtype:x}=rD(m.shape,m.dtype,f,c);d=e.makeTensorInfo(g,x,h)}else{let[f,h]=w.computeOutAndReduceShapes(m.shape,c),g=y.sizeFromShape(h),x=te({inputs:{x:m},backend:e,attrs:{shape:[-1,g]}}),b=oi(n.dtype),C=Xr(x,b,"prod",e);d=te({inputs:{x:C},backend:e,attrs:{shape:f}}),p.push(x),p.push(C)}if(a){p.push(d);let f=w.expandShapeToKeepDim(d.shape,u);d=te({inputs:{x:d},backend:e,attrs:{shape:f}})}return p.forEach(f=>e.disposeIntermediateTensorInfo(f)),d}var g3={kernelName:os,backendName:"webgl",kernelFunc:nte};function ste(r){let{inputs:t,backend:e,attrs:o}=r,{paramsNestedSplits:n,paramsDenseValues:s,indices:a}=t,{outputRaggedRank:i}=o,p=n.map(x=>e.readSync(x.dataId)),u=n.map(x=>x.shape),c=e.readSync(s.dataId),l=e.readSync(a.dataId),[m,d,f]=oD(p,u,c,s.shape,s.dtype,l,a.shape,i),h=m.map(x=>e.makeTensorInfo([x.length],"int32",x)),g=e.makeTensorInfo(f,s.dtype,d);return h.concat([g])}var x3={kernelName:Up,backendName:"webgl",kernelFunc:ste};function ate(r){let{inputs:t,backend:e}=r,{starts:o,limits:n,deltas:s}=t,a=e.readSync(o.dataId),i=e.readSync(n.dataId),p=e.readSync(s.dataId),[u,c]=nD(a,o.shape,o.dtype,i,n.shape,p,s.shape),l=e.makeTensorInfo([u.length],"int32",u),m=e.makeTensorInfo([c.length],o.dtype,c);return[l,m]}var y3={kernelName:Gp,backendName:"webgl",kernelFunc:ate};function ite(r){let{inputs:t,backend:e,attrs:o}=r,{shape:n,values:s,defaultValue:a,rowPartitionTensors:i}=t,{rowPartitionTypes:p}=o,u=e.readSync(n.dataId),c=e.readSync(s.dataId),l=e.readSync(a.dataId),m=i.map(g=>e.readSync(g.dataId)),d=i.map(g=>g.shape),[f,h]=sD(u,n.shape,c,s.shape,s.dtype,l,a.shape,m,d,p);return e.makeTensorInfo(f,s.dtype,h)}var b3={kernelName:Hp,backendName:"webgl",kernelFunc:ite};var Dv=r=>{let{backend:t,attrs:e}=r,{start:o,stop:n,step:s,dtype:a}=e,i=aD(o,n,s,a);return t.makeTensorInfo([i.length],a,i)},C3={kernelName:ma,backendName:"webgl",kernelFunc:Dv};var ute="return 1.0 / x;",pte=xe({opSnippet:ute}),w3={kernelName:ns,backendName:"webgl",kernelFunc:pte};var cte=Wt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : x;
2023-08-05 15:03:11 +02:00
`,lte=`
2022-11-18 17:13:29 +01:00
vec4 result = x * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,mte=xe({opSnippet:cte,packedOpSnippet:lte}),S3={kernelName:ss,backendName:"webgl",kernelFunc:mte};var dte=Wt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : min(6.0, x);
2023-08-05 15:03:11 +02:00
`,fte=`
2022-11-18 17:13:29 +01:00
vec4 result = min(x, vec4(6.)) * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,hte=xe({opSnippet:dte,packedOpSnippet:fte}),I3={kernelName:us,backendName:"webgl",kernelFunc:hte};var yg=class{constructor(t,e,o,n,s){this.variableNames=["A"],this.outputShape=[];let[a,i,p,u]=t;this.outputShape=[a,e,o,u];let c=[n&&e>1?i-1:i,n&&o>1?p-1:p],l=[n&&e>1?e-1:e,n&&o>1?o-1:o],m;s?m="(vec2(yRC) + vec2(0.5)) * effectiveInputOverOutputRatioRC - vec2(0.5)":m="vec2(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec2 effectiveInputOverOutputRatioRC = vec2(
${c[0]/l[0]},
${c[1]/l[1]});
const vec2 inputShapeRC = vec2(${i}.0, ${p}.0);
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 yRC = coords.yz;
// Fractional source index.
vec2 sourceFracIndexRC = ${m};
// Compute the four integer indices.
ivec2 sourceFloorRC = ivec2(max(sourceFracIndexRC, vec2(0.0)));
ivec2 sourceCeilRC = ivec2(
min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)));
float topLeft = getA(b, sourceFloorRC.x, sourceFloorRC.y, d);
float bottomLeft = getA(b, sourceCeilRC.x, sourceFloorRC.y, d);
float topRight = getA(b, sourceFloorRC.x, sourceCeilRC.y, d);
float bottomRight = getA(b, sourceCeilRC.x, sourceCeilRC.y, d);
vec2 fracRC = sourceFracIndexRC - vec2(sourceFloorRC);
float top = topLeft + (topRight - topLeft) * fracRC.y;
float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y;
float newValue = top + (bottom - top) * fracRC.x;
setOutput(newValue);
}
`}};var bg=class{constructor(t,e,o,n,s){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[];let[a,i,p,u]=t;this.outputShape=[a,e,o,u];let c=[n&&e>1?i-1:i,n&&o>1?p-1:p],l=[n&&e>1?e-1:e,n&&o>1?o-1:o],m;s?m="(vec3(yRC) + vec3(0.5)) * effectiveInputOverOutputRatioRC - vec3(0.5)":m="vec3(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec3 effectiveInputOverOutputRatioRC = vec3(
${c[0]/l[0]},
${c[1]/l[1]},
${c[1]/l[1]});
const vec3 inputShapeRC = vec3(${i}.0, ${p}.0,
${p}.0);
float getAValue(int b, int r, int c, int d) {
return getChannel(getA(b, r, c, d), vec2(c, d));
}
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
// Calculate values for next column in yRC.z.
ivec3 yRC = coords.yzz + ivec3(0, 0, 1);
// Fractional source index.
vec3 sourceFracIndexRC = ${m};
// Compute the four integer indices.
ivec3 sourceFloorRC = ivec3(max(sourceFracIndexRC, vec3(0.0)));
ivec3 sourceCeilRC = ivec3(
min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)));
// Should we calculate next column and row elements in 2x2 packed cell.
bool hasNextCol = d < ${u-1};
bool hasNextRow = coords.z < ${o-1};
// In parallel, construct four corners for all four components in
// packed 2x2 cell.
vec4 topLeft = vec4(
getAValue(b, sourceFloorRC.x, sourceFloorRC.y, d),
hasNextCol ? getAValue(b, sourceFloorRC.x, sourceFloorRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceFloorRC.x, sourceFloorRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceFloorRC.x, sourceFloorRC.z, d + 1) : 0.0);
vec4 bottomLeft = vec4(
getAValue(b, sourceCeilRC.x, sourceFloorRC.y, d),
hasNextCol ? getAValue(b, sourceCeilRC.x, sourceFloorRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceCeilRC.x, sourceFloorRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceCeilRC.x, sourceFloorRC.z, d + 1) : 0.0);
vec4 topRight = vec4(
getAValue(b, sourceFloorRC.x, sourceCeilRC.y, d),
hasNextCol ? getAValue(b, sourceFloorRC.x, sourceCeilRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceFloorRC.x, sourceCeilRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceFloorRC.x, sourceCeilRC.z, d + 1) : 0.0);
vec4 bottomRight = vec4(
getAValue(b, sourceCeilRC.x, sourceCeilRC.y, d),
hasNextCol ? getAValue(b, sourceCeilRC.x, sourceCeilRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceCeilRC.x, sourceCeilRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceCeilRC.x, sourceCeilRC.z, d + 1) : 0.0);
vec3 fracRC = sourceFracIndexRC - vec3(sourceFloorRC);
vec4 top = mix(topLeft, topRight, fracRC.yyzz);
vec4 bottom = mix(bottomLeft, bottomRight, fracRC.yyzz);
vec4 newValue = mix(top, bottom, fracRC.x);
setOutput(newValue);
}
`}};function gte(r){let{inputs:t,backend:e,attrs:o}=r,{images:n}=t,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=A().getBool("WEBGL_PACK_IMAGE_OPERATIONS")?new bg(n.shape,p,u,s,a):new yg(n.shape,p,u,s,a);return e.runWebGLProgram(c,[n],"float32")}var v3={kernelName:is,backendName:"webgl",kernelFunc:gte};var Cg=class{constructor(t,e,o){this.variableNames=["dy"],this.outputShape=[],this.outputShape=e;let[,n,s]=e,[,a,i]=t,p=[o&&a>1?n-1:n,o&&i>1?s-1:s],u=[o&&a>1?a-1:a,o&&i>1?i-1:i],c=p[0]/u[0],l=p[1]/u[1],m=1/c,d=1/l,f=Math.ceil(m)*2+2,h=Math.ceil(d)*2+2;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
int r = coords[1];
int c = coords[2];
float accumulator = 0.0;
const float heightScale = float(${c});
const float widthScale = float(${l});
const float invHeightScale = float(${m});
2022-11-20 22:20:02 +01:00
const float invWidthScale = float(${d});
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
const int winHeight = int(${f});
2022-11-18 17:13:29 +01:00
const int winWidth = int(${h});
// Compute bounds for where in dy we will look
float startRLerp = floor(float(r) * invHeightScale);
int startDyR = int(startRLerp - float(winHeight / 2));
float startCLerp = floor(float(c) * invWidthScale);
int startDyC = int(startCLerp - float(winWidth / 2));
// Loop over dy
for (int dyROffset = 0; dyROffset < winHeight; dyROffset++) {
int dyR = dyROffset + startDyR;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= ${a}) {
continue;
}
for (int dyCOffset = 0; dyCOffset < winWidth; dyCOffset++) {
int dyC = dyCOffset + startDyC;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= ${i}) {
continue;
}
float dxR = float(dyR) * heightScale;
int topDxRIndex = int(floor(dxR));
int bottomDxRIndex = int(min(ceil(dxR), ${n-1}.0));
float dxRLerp = dxR - float(topDxRIndex);
float inverseDxRLerp = 1.0 - dxRLerp;
float dxC = float(dyC) * widthScale;
int leftDxCIndex = int(floor(dxC));
int rightDxCIndex = int(min(ceil(dxC), ${s-1}.0));
float dxCLerp = dxC - float(leftDxCIndex);
float inverseDxCLerp = 1.0 - dxCLerp;
if (r == topDxRIndex && c == leftDxCIndex) {
// topLeft
accumulator +=
getDy(b, dyR, dyC, d) * inverseDxRLerp * inverseDxCLerp;
}
if (r == topDxRIndex && c == rightDxCIndex) {
// topRight
accumulator += getDy(b, dyR, dyC, d) * inverseDxRLerp * dxCLerp;
}
if (r == bottomDxRIndex && c == leftDxCIndex) {
// bottomLeft
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * inverseDxCLerp;
}
if (r == bottomDxRIndex && c == rightDxCIndex) {
// bottomRight
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * dxCLerp;
}
}
}
// End loop over dy
setOutput(accumulator);
}
`}};function xte(r){let{inputs:t,backend:e,attrs:o}=r,{images:n,dy:s}=t,{alignCorners:a}=o,i=new Cg(s.shape,n.shape,a);return e.runWebGLProgram(i,[s],s.dtype)}var k3={kernelName:Ja,backendName:"webgl",kernelFunc:xte};var wg=class{constructor(t,e,o,n,s){this.variableNames=["A"],this.outputShape=[];let[a,i,p,u]=t;this.outputShape=[a,e,o,u];let c=[n&&e>1?i-1:i,n&&o>1?p-1:p],l=[n&&e>1?e-1:e,n&&o>1?o-1:o],m=n?"0.5":"0.0",d;s?d="max((vec2(yRC) + vec2(0.5)) * effectiveInputOverOutputRatioRC, vec2(0.0))":d="vec2(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec2 effectiveInputOverOutputRatioRC = vec2(
${c[0]/l[0]},
${c[1]/l[1]});
const vec2 inputShapeRC = vec2(${i}.0, ${p}.0);
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 yRC = coords.yz;
// Fractional source index.
2022-11-20 22:20:02 +01:00
vec2 sourceFracIndexRC = ${d};
2022-11-18 17:13:29 +01:00
// Compute the coordinators of nearest neighbor point.
ivec2 sourceNearestRC = ivec2(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + ${m})));
float newValue = getA(b, sourceNearestRC.x, sourceNearestRC.y, d);
setOutput(newValue);
}
`}};var Sg=class{constructor(t,e,o,n,s){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[];let[a,i,p,u]=t;this.outputShape=[a,e,o,u];let c=[n&&e>1?i-1:i,n&&o>1?p-1:p],l=[n&&e>1?e-1:e,n&&o>1?o-1:o],m=n?"0.5":"0.0",d;s?d="max((vec3(yRC) + vec3(0.5)) * effectiveInputOverOutputRatioRC, vec3(0.0))":d="vec3(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec3 effectiveInputOverOutputRatioRC = vec3(
${c[0]/l[0]},
${c[1]/l[1]},
${c[1]/l[1]});
const vec3 inputShapeRC = vec3(${i}.0, ${p}.0,
${p}.0);
float getAValue(int b, int r, int c, int d) {
return getChannel(getA(b, r, c, d), vec2(c, d));
}
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
// Calculate values for next column in yRC.z.
ivec3 yRC = coords.yzz + ivec3(0, 0, 1);
// Fractional source index.
2022-11-20 22:20:02 +01:00
vec3 sourceFracIndexRC = ${d};
2022-11-18 17:13:29 +01:00
// Compute the coordinators of nearest neighbor point.
ivec3 sourceNearestRC = ivec3(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + ${m})));
// Should we calculate next column and row elements in 2x2 packed cell.
bool hasNextCol = d < ${u-1};
bool hasNextRow = coords.z < ${o-1};
vec4 newValue = vec4(
getAValue(b, sourceNearestRC.x, sourceNearestRC.y, d),
hasNextCol ? getAValue(b, sourceNearestRC.x, sourceNearestRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceNearestRC.x, sourceNearestRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceNearestRC.x, sourceNearestRC.z, d + 1) : 0.0);
setOutput(newValue);
}
`}};function yte(r){let{inputs:t,backend:e,attrs:o}=r,{images:n}=t,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=A().getBool("WEBGL_PACK_IMAGE_OPERATIONS")?new Sg(n.shape,p,u,s,a):new wg(n.shape,p,u,s,a);return e.runWebGLProgram(c,[n],n.dtype)}var N3={kernelName:as,backendName:"webgl",kernelFunc:yte};var Ig=class{constructor(t,e,o){this.variableNames=["dy"],this.outputShape=[],this.outputShape=e;let[,n,s]=e,[,a,i]=t,p=[o&&a>1?n-1:n,o&&i>1?s-1:s],u=[o&&a>1?a-1:a,o&&i>1?i-1:i],c=p[0]/u[0],l=p[1]/u[1],m=1/c,d=1/l,f=Math.ceil(m)*2+2,h=Math.ceil(d)*2+2;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
int r = coords[1];
int c = coords[2];
float accumulator = 0.0;
const float heightScale = float(${c});
const float widthScale = float(${l});
const float invHeightScale = float(${m});
2022-11-20 22:20:02 +01:00
const float invWidthScale = float(${d});
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
const int winHeight = int(${f});
2022-11-18 17:13:29 +01:00
const int winWidth = int(${h});
// Compute bounds for where in dy we will look
float startRLerp = floor(float(r) * invHeightScale);
int startDyR = int(floor(startRLerp - float(winHeight / 2)));
float startCLerp = floor(float(c) * invWidthScale);
int startDyC = int(floor(startCLerp - float(winWidth / 2)));
// Loop over dy
for (int dyROffset = 0; dyROffset < winHeight; dyROffset++) {
int dyR = dyROffset + startDyR;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= ${a}) {
continue;
}
for (int dyCOffset = 0; dyCOffset < winWidth; dyCOffset++) {
int dyC = dyCOffset + startDyC;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= ${i}) {
continue;
}
float sourceFracRow =
float(${p[0]}) *
(float(dyR) / float(${u[0]}));
float sourceFracCol =
float(${p[1]}) *
(float(dyC) / float(${u[1]}));
int sourceNearestRow = int(min(
float(int(${n}) - 1),
${o} ? float(round(sourceFracRow)) :
float(floor(sourceFracRow))));
int sourceNearestCol = int(min(
float(int(${s}) - 1),
${o} ? float(round(sourceFracCol)) :
float(floor(sourceFracCol))));
if (r == sourceNearestRow && c == sourceNearestCol) {
accumulator += getDy(b, dyR, dyC, d);
}
}
}
// End loop over dy
setOutput(accumulator);
}
`}};function bte(r){let{inputs:t,backend:e,attrs:o}=r,{images:n,dy:s}=t,{alignCorners:a}=o,i=new Ig(s.shape,n.shape,a);return e.runWebGLProgram(i,[s],s.dtype)}var T3={kernelName:Za,backendName:"webgl",kernelFunc:bte};var vg=class{constructor(t,e){this.variableNames=["x"];let o=t.length;if(o>4)throw new Error(`WebGL backend: Reverse of rank-${o} tensor is not yet supported`);if(this.outputShape=t,o===1){this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
int coord = getOutputCoords();
2023-08-05 15:03:11 +02:00
setOutput(getX(${t[0]} - coord - 1));
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`;return}let n=i=>e.indexOf(i)!==-1&&t[i]!==1?`${t[i]} - coords[${i}] - 1`:`coords[${i}]`,s=t.map((i,p)=>n(p)).join(","),a=Re(o);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${a} coords = getOutputCoords();
setOutput(getX(${s}));
}
`}};var kg=class{constructor(t,e){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0;let o=t.length;if(o>4)throw new Error(`WebGL backend: Reverse of rank-${o} tensor is not yet supported`);this.outputShape=t;let n=Rt("rc",o),s=`${n[o-1]} + 1 < ${this.outputShape[o-1]}`,a=`${n[o-2]} + 1 < ${this.outputShape[o-2]}`,i=Re(o);o===1?this.userCode=`
2022-11-18 17:13:29 +01:00
void main(){
int rc = getOutputCoords();
vec4 result = vec4(0.);
2023-08-05 15:03:11 +02:00
result.r = getChannel(getX(${t[0]} - rc - 1),
${t[0]} - rc - 1);
2022-11-18 17:13:29 +01:00
if(${s}){
2023-08-05 15:03:11 +02:00
result.g = getChannel(getX(${t[0]} - (rc + 1) - 1),
${t[0]} - (rc + 1) - 1);
2022-11-18 17:13:29 +01:00
}
setOutput(result);
}
`:this.userCode=`
void main() {
${i} rc = getOutputCoords();
vec4 result = vec4(0.);
result.r = ${p(n.slice())};
if(${s}){
result.g = ${u(n.slice())};
}
if(${a}) {
result.b = ${c(n.slice())};
if(${s}) {
result.a = ${l(n.slice())};
}
}
setOutput(result);
}
`;function p(f){return m(f)}function u(f){return f[o-1]="("+f[o-1]+" + 1)",m(f)}function c(f){return f[o-2]="("+f[o-2]+" + 1)",m(f)}function l(f){return f[o-1]="("+f[o-1]+" + 1)",f[o-2]="("+f[o-2]+" + 1)",m(f)}function m(f){let h=t.map((b,C)=>d(C,f)),g=h.join(","),x=h.slice(-2).join(",");return`getChannel(getX(${g}), vec2(${x}))`}function d(f,h){return e.indexOf(f)!==-1&&t[f]!==1?`${t[f]} - ${h[f]} - 1`:`${h[f]}`}}};function Cte(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{dims:s}=o,a=n.shape.length,i=y.parseAxisParam(s,n.shape);if(a===0)return Dt({inputs:{x:n},backend:e});let p=A().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new kg(n.shape,i):new vg(n.shape,i);return e.runWebGLProgram(p,[n],n.dtype)}var _3={kernelName:ps,backendName:"webgl",kernelFunc:Cte};var Ng=class{constructor(t,e){this.variableNames=["Image"],this.outputShape=[],this.customUniforms=[{name:"params",type:"vec4"}];let o=t[1],n=t[2];this.outputShape=t;let s="";typeof e=="number"?s=`float outputValue = ${e.toFixed(2)};`:s=`
2023-08-05 15:03:11 +02:00
vec3 fill = vec3(${e.join(",")});
2022-11-18 17:13:29 +01:00
float outputValue = fill[coords[3]];`,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int x = coords[2];
int y = coords[1];
float coordXFloat = (float(x) - params[0]) * params[3] -
(float(y) - params[1]) * params[2];
float coordYFloat = (float(x) - params[0]) * params[2] +
(float(y) - params[1]) * params[3];
int coordX = int(round(coordXFloat + params[0]));
int coordY = int(round(coordYFloat + params[1]));
${s}
if(coordX >= 0 && coordX < ${n} && coordY >= 0 && coordY < ${o}) {
outputValue = getImage(coords[0], coordY, coordX, coords[3]);
}
setOutput(outputValue);
}
`}};var $3={kernelName:Ds,backendName:"webgl",kernelFunc:({inputs:r,attrs:t,backend:e})=>{let{image:o}=r,{radians:n,fillValue:s,center:a}=t,i=e,p=new Ng(o.shape,s),[u,c]=w.getImageCenter(a,o.shape[1],o.shape[2]),l=[[u,c,Math.sin(n),Math.cos(n)]];return i.runWebGLProgram(p,[o],o.dtype,l)}};var wte=`
2022-11-18 17:13:29 +01:00
// OpenGL ES does not support round function.
// The algorithm is based on banker's rounding.
float base = floor(x);
if ((x - base) < 0.5) {
return floor(x);
} else if ((x - base) > 0.5) {
return ceil(x);
} else {
if (mod(base, 2.0) == 0.0) {
return base;
} else {
return base + 1.0;
}
}
`,Ste=xe({opSnippet:wte}),E3={kernelName:cs,backendName:"webgl",kernelFunc:Ste};var Ite="return inversesqrt(x);",vte=xe({opSnippet:Ite,cpuKernelImpl:iD}),R3={kernelName:ls,backendName:"webgl",kernelFunc:vte};var yu=class{constructor(t,e,o,n,s,a,i=!0,p=!1){this.variableNames=["updates","indices","defaultValue"],this.outputShape=a;let u=Re(s.length),c=Re(a.length),l="";o===1?l="i":o===2&&(l="i, j");let m=`getIndices(${l})`,d="";n===1?d="i":n===2&&(d="i, coords[1]");let f=`getUpdates(${d})`,h="";p&&(h="coords[0], coords[1]");let g=`getDefaultValue(${h})`,x=e>1?"strides[j]":"strides";this.userCode=`
2023-05-08 15:12:41 +02:00
${u} strides = ${u}(${s});
2022-11-18 17:13:29 +01:00
void main() {
2023-05-08 15:12:41 +02:00
${c} coords = getOutputCoords();
2022-11-18 17:13:29 +01:00
float sum = 0.0;
bool found = false;
2023-08-05 15:03:11 +02:00
for (int i = 0; i < ${t}; i++) {
2022-11-18 17:13:29 +01:00
int flattenedIndex = 0;
2023-08-05 15:03:11 +02:00
for (int j = 0; j < ${e}; j++) {
2023-05-08 15:12:41 +02:00
int index = round(${m});
flattenedIndex += index * ${x};
2022-11-18 17:13:29 +01:00
}
if (flattenedIndex == coords[0]) {
2023-05-08 15:12:41 +02:00
sum += ${f};
2022-11-18 17:13:29 +01:00
found = true;
}
}
2023-05-08 15:12:41 +02:00
setOutput(mix(${g}, sum, float(found)));
}
`}};var Tg=class{constructor(t,e,o,n,s,a,i=!0,p=!1){this.variableNames=["updates","indices","defaultValue"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=a;let u=Re(s.length),c=Re(a.length),l="";o===1?l="i":o===2&&(l="i, j");let m=`getIndices(${l})`,d="";n===1?d="i":n===2&&(d="i, coords[1]");let f=`getUpdates(${d})`,h="";p&&(h="coords[0], coords[1]");let g=`getDefaultValue(${h})`,x=e>1?"strides[j]":"strides",b=e>1?"strides[j + 1]":"strides";this.userCode=`
2023-05-08 15:12:41 +02:00
${u} strides = ${u}(${s});
void main() {
${c} coords = getOutputCoords();
vec4 sum = vec4(0.);
vec4 found = vec4(0.);
2023-08-05 15:03:11 +02:00
for (int i = 0; i < ${t}; i+=2) {
2023-05-08 15:12:41 +02:00
ivec2 flattenedIndex = ivec2(0);
2023-08-05 15:03:11 +02:00
for (int j = 0; j < ${e}; j+=2) {
2023-05-08 15:12:41 +02:00
ivec4 index = round(${m});
flattenedIndex += index.xz * ${x};
2023-08-05 15:03:11 +02:00
if (j + 1 < ${e}) {
2023-05-08 15:12:41 +02:00
flattenedIndex += index.yw * ${b};
}
}
if (flattenedIndex[0] == coords[0] || flattenedIndex[1] == coords[0] ||
flattenedIndex[0] == coords[0] + 1 || flattenedIndex[1] == coords[0] + 1) {
vec4 updVals = ${f};
if (flattenedIndex[0] == coords[0]) {
sum.xy += updVals.xy;
found.xy = vec2(1.);
} else if (flattenedIndex[0] == coords[0] + 1) {
sum.zw += updVals.xy;
found.zw = vec2(1.);
}
if (flattenedIndex[1] == coords[0]) {
sum.xy += updVals.zw;
found.xy = vec2(1.);
} else if (flattenedIndex[1] == coords[0] + 1) {
sum.zw += updVals.zw;
found.zw = vec2(1.);
}
}
}
setOutput(mix(${g}, sum, found));
2022-11-18 17:13:29 +01:00
}
`}};function kte(r){let{inputs:t,backend:e,attrs:o}=r,{indices:n,updates:s}=t,{shape:a}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=w.calculateShapes(s,n,a),m=[l/u,u];if(l===0)return e.makeTensorInfo(a,n.dtype);let d=te({inputs:{x:n},backend:e,attrs:{shape:[p,i]}}),f=te({inputs:{x:s},backend:e,attrs:{shape:[p,u]}}),h=e.makeTensorInfo([],"float32",new Float32Array([0])),g;A().getBool("WEBGL_PACK")?g=new Tg(p,i,d.shape.length,f.shape.length,c,m):g=new yu(p,i,d.shape.length,f.shape.length,c,m);let x=e.runWebGLProgram(g,[f,d,h],f.dtype),b=te({inputs:{x},backend:e,attrs:{shape:a}});return e.disposeIntermediateTensorInfo(d),e.disposeIntermediateTensorInfo(f),e.disposeIntermediateTensorInfo(x),e.disposeIntermediateTensorInfo(h),b}var D3={kernelName:ms,backendName:"webgl",kernelFunc:kte};var _g=class{constructor(t,e,o,n){this.variableNames=["sortedSequence","values"],this.customUniforms=[{name:"numInputs",type:"int"}],this.outputShape=[t,o];let s="while (left < right) {",a=`for (int i = 0; i < ${Math.ceil(Math.log2(e+1))}; ++i) { if (left >= right) break;`,i=A().getNumber("WEBGL_VERSION")===2?s:a,p=n==="left"?"<":"<=";this.userCode=`
2022-11-18 17:13:29 +01:00
int findBound(int batch, float value) {
int left = 0;
int right = numInputs;
int mid;
${i}
mid = (left + right) / 2;
if (getSortedSequence(batch, mid) ${p} value) {
left = mid + 1;
} else {
right = mid;
}
}
return right;
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int valueIndex = coords[1];
float value = getValues(batch, valueIndex);
setOutput(float(findBound(batch, value)));
}
`}};function Nte(r){let{inputs:t,backend:e,attrs:o}=r,{sortedSequence:n,values:s}=t,{side:a}=o,i=new _g(n.shape[0],n.shape[1],s.shape[1],a),p=[[n.shape[1]]];return e.runWebGLProgram(i,[n,s],"int32",p)}var A3={kernelName:fs,backendName:"webgl",kernelFunc:Nte};var $g=class{constructor(t,e,o){this.variableNames=["c","a","b"],this.outputShape=e;let n,s;if(o>4)throw Error(`Where for rank ${o} is not yet supported`);if(o===1)s="resRC",n="resRC";else{let i=["resRC.x","resRC.y","resRC.z","resRC.w"],p=[],u=[];for(let c=0;c<e.length;c++)u.push(`${i[c]}`),c<t&&p.push(`${i[c]}`);n=p.join(),s=u.join()}let a=Re(o);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${a} resRC = getOutputCoords();
float cVal = getC(${n});
if (cVal >= 1.0) {
setOutput(getA(${s}));
} else {
setOutput(getB(${s}));
}
}
`}};function Tte(r){let{inputs:t,backend:e}=r,{condition:o,t:n,e:s}=t,a=new $g(o.shape.length,n.shape,n.shape.length);return e.runWebGLProgram(a,[o,n,s],dt(n.dtype,s.dtype))}var F3={kernelName:fa,backendName:"webgl",kernelFunc:Tte};var _te=`
2022-11-18 17:13:29 +01:00
// Stable and Attracting Fixed Point (0, 1) for Normalized Weights.
// see: https://arxiv.org/abs/1706.02515
float scaleAlpha = ${w.SELU_SCALEALPHA};
float scale = ${w.SELU_SCALE};
2022-11-18 17:13:29 +01:00
return (x >= 0.0) ? scale * x : scaleAlpha * (exp(x) - 1.0);
`,$te=xe({opSnippet:_te}),P3={kernelName:hs,backendName:"webgl",kernelFunc:$te};var Ete=Fo+`
2022-11-18 17:13:29 +01:00
return 1.0 / (1.0 + exp(-1.0 * x));
2023-08-05 15:03:11 +02:00
`,Rte=`
2022-11-18 17:13:29 +01:00
vec4 result = 1.0 / (1.0 + exp(-1.0 * x));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,Dte=xe({opSnippet:Ete,packedOpSnippet:Rte,cpuKernelImpl:pD}),O3={kernelName:bs,backendName:"webgl",kernelFunc:Dte};var Ate=`
2022-11-18 17:13:29 +01:00
if (isnan(x)) { return 0.0; }
return sign(x);
`,Fte=xe({opSnippet:Ate}),M3={kernelName:ys,backendName:"webgl",kernelFunc:Fte};var Pte=Fo+`
2022-11-18 17:13:29 +01:00
return sin(x);
2023-08-05 15:03:11 +02:00
`,Ote=`
2023-05-08 15:12:41 +02:00
vec4 result = sin(x);
bvec4 isNaN = isnan(x);
${jr}
2023-05-08 15:12:41 +02:00
return result;
`,Mte=xe({opSnippet:Pte,packedOpSnippet:Ote}),L3={kernelName:gs,backendName:"webgl",kernelFunc:Mte};var Lte=`
2022-11-18 17:13:29 +01:00
float e2x = exp(x);
return (e2x - 1.0 / e2x) / 2.0;
`,Bte=xe({opSnippet:Lte}),B3={kernelName:xs,backendName:"webgl",kernelFunc:Bte};var zte=`
2022-11-18 17:13:29 +01:00
float epsilon = 1.1920928955078125e-7;
float threshold = log(epsilon) + 2.0;
bool too_large = x > -threshold;
bool too_small = x < threshold;
float result;
float exp_x = exp(x);
if (too_large){
result = x;
}
else if (too_small){
result = exp_x;
}
else{
result = log(exp_x + 1.0);
}
return result;
`,Vte=xe({opSnippet:zte}),z3={kernelName:Cs,backendName:"webgl",kernelFunc:Vte};var Wte=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{blockShape:s,paddings:a}=o;y.assert(n.shape.length<=4,()=>"spaceToBatchND for rank > 4 with a WebGL backend not implemented yet");let i=s.reduce((x,b)=>x*b),p=[[0,0]];p.push(...a);for(let x=1+s.length;x<n.shape.length;++x)p.push([0,0]);let u=[],c=Rv({inputs:{x:n},backend:e,attrs:{paddings:p,constantValue:0}}),l=w.getReshaped(c.shape,s,i,!1),m=w.getPermuted(l.length,s.length,!1),d=w.getReshapedPermuted(c.shape,s,i,!1),f=te({inputs:{x:c},backend:e,attrs:{shape:l}}),h=bt({inputs:{x:f},backend:e,attrs:{perm:m}}),g=te({inputs:{x:h},backend:e,attrs:{shape:d}});return u.push(c),u.push(f),u.push(h),u.forEach(x=>e.disposeIntermediateTensorInfo(x)),g},V3={kernelName:ga,backendName:"webgl",kernelFunc:Wte};function Ute(r){let{inputs:t,backend:e}=r,{indices:o,values:n,denseShape:s,defaultValue:a}=t;if(s.shape.length!==1)throw new Error(`Dense shape must be a vector, saw:
2022-11-18 17:13:29 +01:00
${s.shape}`);if(o.shape.length!==2)throw new Error(`Indices must be a matrix, saw:
${o.shape}`);if(n.shape.length!==1)throw new Error(`Values must be a vector, saw:
${n.shape}`);if(a.shape.length!==0)throw new Error(`Default value must be a scalar, saw:
${a.shape}`);let i=e.readSync(o.dataId),p=e.readSync(n.dataId),u=e.readSync(s.dataId),c=e.readSync(a.dataId)[0],[l,m,d,f,h]=lD(i,o.shape,o.dtype,p,n.dtype,u,c);return[e.makeTensorInfo(m,o.dtype,l),e.makeTensorInfo([m[0]],n.dtype,d),e.makeTensorInfo([f.length],"bool",new Uint8Array(f.map(g=>Number(g)))),e.makeTensorInfo([h.length],o.dtype,new Int32Array(h))]}var W3={kernelName:Hi,backendName:"webgl",kernelFunc:Ute};function Gte(r){let{inputs:t,backend:e}=r,{inputIndices:o,inputShape:n,newShape:s}=t;if(o.shape.length!==2)throw new Error(`Input indices should be a matrix but received shape ${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape ${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=Array.from(e.readSync(n.dataId)),i=e.readSync(o.dataId),p=Array.from(e.readSync(s.dataId)),[u,c,l]=mD(i,o.shape,o.dtype,a,p);return[e.makeTensorInfo(c,o.dtype,u),e.makeTensorInfo([l.length],s.dtype,new Int32Array(l))]}var U3={kernelName:ei,backendName:"webgl",kernelFunc:Gte};function Hte(r){let{inputs:t,backend:e}=r,{data:o,indices:n,segmentIds:s}=t;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);let a=e.readSync(o.dataId),i=e.readSync(n.dataId),p=e.readSync(s.dataId),[u,c]=ch(a,o.shape,o.dtype,i,p,!0);return e.makeTensorInfo(c,o.dtype,u)}var G3={kernelName:ya,backendName:"webgl",kernelFunc:Hte};function Kte(r){let{inputs:t,backend:e}=r,{data:o,indices:n,segmentIds:s}=t;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);let a=e.readSync(o.dataId),i=e.readSync(n.dataId),p=e.readSync(s.dataId),[u,c]=ch(a,o.shape,o.dtype,i,p);return e.makeTensorInfo(c,o.dtype,u)}var H3={kernelName:ba,backendName:"webgl",kernelFunc:Kte};function qte(r){let{inputs:t,backend:e,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=t,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=w.calculateShapes(s,n,i),d=!1;if(s.dtype==="string"){let x=e.bufferSync(n),b=e.bufferSync(s),C=y.decodeString(e.readSync(a.dataId)[0]),S=uD(x,b,i,m,c,u,p,l,C,d);return e.makeTensorInfo(i,S.dtype,S.values)}let f=new yu(u,p,n.shape.length,s.shape.length,l,[m,1],d),h=e.runWebGLProgram(f,[s,n,a],s.dtype),g=te({inputs:{x:h},backend:e,attrs:{shape:i}});return e.disposeIntermediateTensorInfo(h),g}var K3={kernelName:vs,backendName:"webgl",kernelFunc:qte};function jte(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{numOrSizeSplits:s,axis:a}=o,i=y.parseAxisParam(a,n.shape)[0],p=w.prepareSplitSize(n,s,i),u=n.shape.length,c=new Array(u).fill(0),l=n.shape.slice();return p.map(m=>{let d=[...l];d[i]=m;let f=Gs({inputs:{x:n},backend:e,attrs:{begin:c,size:d}});return c[i]+=m,f})}var q3={kernelName:xa,backendName:"webgl",kernelFunc:jte};var j3="return sqrt(x);",Xte=xe({opSnippet:j3,packedOpSnippet:j3,cpuKernelImpl:dD}),X3={kernelName:ws,backendName:"webgl",kernelFunc:Xte};var Yte="return x * x;",Qte=xe({opSnippet:Yte}),Y3={kernelName:Ki,backendName:"webgl",kernelFunc:Qte};var Q3="return (a - b) * (a - b);",Zte=nt({opSnippet:Q3,packedOpSnippet:Q3}),Z3={kernelName:ks,backendName:"webgl",kernelFunc:Zte};function Jte(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t;if(n.dtype!=="string")throw new Error("Input must be of datatype string");let s=e.readSync(n.dataId),a=w.fromUint8ToStringArray(s),i=fD(a,"string",o);return e.makeTensorInfo(n.shape,"string",i)}var J3={kernelName:$u,backendName:"webgl",kernelFunc:Jte};function ere({inputs:r,attrs:t,backend:e}){let{x:o}=r,n=Wt+`
2023-08-05 15:03:11 +02:00
return x > 0.0 ? 1.0 : float(${t.alpha});
`,s=new tr(o.shape,n);return e.runWebGLProgram(s,[o],o.dtype)}var eP={kernelName:wo,backendName:"webgl",kernelFunc:ere};var Eg=class{constructor(t,e,o){this.variableNames=["x"],this.outputShape=o;let n=o.length,s=Re(o.length),a=Re(o.length),i="";if(n===1)i="coords * strides + begin";else{let p=0;i=o.map((u,c)=>(p++,o.length===1?`coords * strides[${c}] + begin[${c}]`:`coords[${p-1}] * strides[${c}] + begin[${c}]`)).join(",")}this.userCode=`
2023-08-05 15:03:11 +02:00
${s} begin = ${s}(${t});
${s} strides = ${s}(${e});
2022-11-18 17:13:29 +01:00
void main() {
${a} coords = getOutputCoords();
setOutput(getX(${i}));
}
`}};function tre(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:d,finalShape:f,isIdentity:h,sliceDim0:g,isSimpleSlice:x,begin:b,end:C,strides:S}=pt.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=te({inputs:{x:n},backend:e,attrs:{shape:f}});else if(g||x){y.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let E=pt.computeOutShape(b,C,S),R=Gs({inputs:{x:n},backend:e,attrs:{begin:b,size:E}});k=te({inputs:{x:R},backend:e,attrs:{shape:f}}),e.disposeIntermediateTensorInfo(R)}else if(e.shouldExecuteOnCPU([n])){let R=e.readSync(n.dataId),D=me(n.shape,n.dtype,R),P=hD(d,D,S,b);k=e.makeTensorInfo(f,n.dtype,P.values)}else{let R=new Eg(b,S,d);k=e.runWebGLProgram(R,[n],n.dtype)}let _=te({inputs:{x:k},backend:e,attrs:{shape:f}});return e.disposeIntermediateTensorInfo(k),_}var tP={kernelName:Ns,backendName:"webgl",kernelFunc:tre};function rre(r){let{inputs:t,backend:e,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=t,m=e.readSync(c.dataId),d=e.readSync(l.dataId),[f,h]=gD(m,d,n,s,a,i,p,u);return[e.makeTensorInfo([f.length],"string",f),e.makeTensorInfo(l.shape,"int32",h)]}var rP={kernelName:Ca,backendName:"webgl",kernelFunc:rre};function ore(r){let{inputs:t,backend:e,attrs:o}=r,{skipEmpty:n}=o,{input:s,delimiter:a}=t;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(s.shape.length!==1)throw new Error(`Input must be a vector, got shape: ${s.shape}`);if(a.shape.length!==0)throw new Error(`Delimiter must be a scalar, got shape: ${a.shape}`);let i=e.readSync(s.dataId),p=e.readSync(a.dataId)[0],[u,c,l]=xD(i,p,n),m=c.length;return[e.makeTensorInfo([m,2],"int32",u),e.makeTensorInfo([m],"string",c),e.makeTensorInfo([2],"int32",new Int32Array(l))]}var oP={kernelName:qi,backendName:"webgl",kernelFunc:ore};function nre(r){let{inputs:t,backend:e,attrs:o}=r,{numBuckets:n}=o,{input:s}=t;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(n<=0)throw new Error("Number of buckets must be at least 1");let a=e.readSync(s.dataId),i=yD(a,n);return e.makeTensorInfo(s.shape,"int32",i)}var nP={kernelName:ji,backendName:"webgl",kernelFunc:nre};var sre="return tan(x);",are=xe({opSnippet:sre}),sP={kernelName:_s,backendName:"webgl",kernelFunc:are};var ire=`
2022-11-18 17:13:29 +01:00
float e2x = exp(-2.0 * abs(x));
return sign(x) * (1.0 - e2x) / (1.0 + e2x);
`,ure=xe({opSnippet:ire}),aP={kernelName:$s,backendName:"webgl",kernelFunc:ure};function pre(r){let{inputs:t,backend:e,attrs:o}=r,{tensor:n,indices:s,updates:a}=t,{}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=w.calculateShapes(a,s,n.shape),m=[l/u,u];if(l===0)return e.makeTensorInfo(n.shape,s.dtype);let d=te({inputs:{x:s},backend:e,attrs:{shape:[p,i]}}),f=te({inputs:{x:a},backend:e,attrs:{shape:[p,u]}}),h=te({inputs:{x:n},backend:e,attrs:{shape:m}}),g=new yu(p,i,d.shape.length,f.shape.length,c,m,!1,!0),x=e.runWebGLProgram(g,[f,d,h],h.dtype),b=te({inputs:{x},backend:e,attrs:{shape:n.shape}});return e.disposeIntermediateTensorInfo(d),e.disposeIntermediateTensorInfo(f),e.disposeIntermediateTensorInfo(h),e.disposeIntermediateTensorInfo(x),b}var iP={kernelName:ds,backendName:"webgl",kernelFunc:pre};var Rg=class{constructor(t,e){this.variableNames=["A"];let o=new Array(t.length);for(let a=0;a<o.length;a++)o[a]=t[a]*e[a];this.outputShape=o,this.rank=o.length;let n=Re(this.rank),s=cre(t);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} resRC = getOutputCoords();
setOutput(getA(${s}));
}
`}};function cre(r){let t=r.length;if(t>5)throw Error(`Tile for rank ${t} is not yet supported`);if(t===1)return`imod(resRC, ${r[0]})`;let e=["resRC.x","resRC.y","resRC.z","resRC.w","resRC.u"],o=[];for(let n=0;n<r.length;n++)o.push(`imod(${e[n]}, ${r[n]})`);return o.join()}function Av(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{reps:s}=o;if(n.dtype==="string"||n.shape.length>5){let p=e.readSync(n.dataId),u=n.dtype==="string"?p.map(m=>y.decodeString(m)):p,c=me(n.shape,n.dtype,u),l=CD(c,s);return e.makeTensorInfo(l.shape,l.dtype,l.values)}let a=new Rg(n.shape,s);return e.runWebGLProgram(a,[n],n.dtype)}var uP={kernelName:uo,backendName:"webgl",kernelFunc:Av};var Dg=class{constructor(t){this.variableNames=["x","indices"],this.customUniforms=[{name:"n",type:"int"},{name:"firstPass",type:"int"},{name:"negativeInf",type:"float"},{name:"dir",type:"int"},{name:"inc",type:"int"}],this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int elemIdx = coords[1];
// We compare elements pair-wise within a group of size 2 * inc.
// The comparing rule for each group alternates between ascending
// and descending. Within each group, we compare each pair at
// positions i and i+inc. To decide whether an element at position i
// is x0 or x1, we mod it by 2 * inc, if the result is smaller than
// inc, it is in the first half of the group, we denote it as x0,
// otherwise we denote it as x1.
// For example, as shown in the Bitonic top K paper referenced above,
// Figure5(a) shows that element[1] is in the
// second half of the group when group size is 2, but it is in the
// first half of the group when group size is 4.
bool isFirstInPair = imod(elemIdx, 2 * inc) < inc;
int i = isFirstInPair ? elemIdx : elemIdx - inc;
int i0 = firstPass == 1 ? i : int(getIndices(batch, i));
int i1 = firstPass == 1 ? i + inc : int(getIndices(batch, i + inc));
float x0 = i0 < n ? getX(batch, i0) : negativeInf;
float x1 = i1 < n ? getX(batch, i1) : negativeInf;
// Denotes which direction indices are in (ascending or descending).
bool reverse = imod(elemIdx, 2 * dir) >= dir;
bool isGreater = x0 > x1 || (x0 == x1 && i1 > i0);
if (reverse == isGreater) { // Elements in opposite order of direction
int iTemp = i0;
i0 = i1;
i1 = iTemp;
}
if (isFirstInPair) {
setOutput(float(i0));
} else {
setOutput(float(i1));
}
}
`}},Ag=class{constructor(t){this.variableNames=["x","indices"],this.customUniforms=[{name:"n",type:"int"},{name:"firstPass",type:"int"},{name:"k",type:"int"}],this.outputShape=t,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
// Takes max of indices (0, k), (1, k + 1), (2, k + 2) ...
ivec2 coords = getOutputCoords();
int batch = coords[0];
int elemIdx = coords[1];
// The output size is half of the previous size.
// If the previous sequence is | | | | _ _ _ _ | | | | _ _ _ _ (k=4),
// we only need to output the indices at positions |, the indices at
// positions _ can be thrown away, see Figure5(b) After Phase 2
// (Merge phase) in the Bitonic Top K paper referenced above.
// For example, the paper shows we only need to output the orange bars.
// The output sequence should look like this | | | | | | | |.
// Because the sequence is halved, to map the output index back
// to the previous sequence to find the corresponding value,
// we need to double the index. When we double the index,
// we basically interpolate a position, so 2i looks like
// | _ | _ | _ | _ | _ | _ | _. We move the | to the first k position
// of each 2k positions by - elemIdx % k. E.g. for output at
// index 4,5,6,7, we want to get the corresponding element at
// original index 8,9,10,11, for output at index 8,9,10,11,
// we want to get the corresponding element at original index
// 16,17,18,19, so on and so forth.
int i = elemIdx < k ? elemIdx : (elemIdx * 2 - imod(elemIdx, k));
int i0 = firstPass == 1 ? i : int(getIndices(batch, i));
int i1 = firstPass == 1 ? i + k : int(getIndices(batch, i + k));
float x0 = getX(batch, i0);
float x1 = i1 < n ? getX(batch, i1) : x0;
setOutput(x0 >= x1 ? float(i0) : float(i1));
}
`}};function Ip(r,t){t!==null&&r.disposeIntermediateTensorInfo(t)}function pP(r){let t=1;for(;t<r;)t*=2;return t}function lre(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{k:s,sorted:a}=o,i=A().getNumber("TOPK_LAST_DIM_CPU_HANDOFF_SIZE_THRESHOLD"),p=A().getNumber("TOPK_K_CPU_HANDOFF_THRESHOLD"),u=n.shape,c=u[u.length-1];if(e.shouldExecuteOnCPU([n])||c<i||s>p){let P=e.readSync(n.dataId),[O,M]=wD(P,u,n.dtype,s,a);return[e.makeTensorInfo(O.shape,O.dtype,O.values),e.makeTensorInfo(M.shape,M.dtype,M.values)]}if(s===0)return u[u.length-1]=0,[e.makeTensorInfo(u,n.dtype,[]),e.makeTensorInfo(u,"int32",[])];if(c===1)return[n,bi({attrs:{shape:u,dtype:"int32",value:0},backend:e})];let l=e.texData.get(n.dataId),m=l!==null&&l.isPacked,d=m?e.unpackTensor(n):n,h=y.sizeFromShape(u)/c,g=te({inputs:{x:d},attrs:{shape:[h,c]},backend:e});m&&Ip(e,d);let x=pP(s),b=pP(c),C=null,S=()=>C===null?[g,g]:[g,C],k=(P,O,M)=>{let L=S(),B=new Dg(M),U=[[c],[C===null?1:0],[Number.NEGATIVE_INFINITY],[P],[O]],j=C;C=e.runWebGLProgram(B,L,"int32",U),Ip(e,j)};for(let P=1;P<x;P*=2){let O=P*2;for(let M=P;M>=1;M/=2)k(O,M,[h,b])}for(let P=b;P>x;P/=2){let O=S(),M=new Ag([h,P/2]),B=[[c],[C===null?1:0],[x]],z=C;C=e.runWebGLProgram(M,O,"int32",B),Ip(e,z);let U=x/2,j=U*2;for(let q=U;q>=1;q/=2)k(j,q,C.shape)}let _=C;C=Gs({inputs:{x:C},backend:e,attrs:{begin:0,size:[h,s]}}),Ip(e,_);let E=Nv({inputs:{x:g,indices:C},backend:e,attrs:{axis:1,batchDims:1}});Ip(e,g);let R=u.slice(0,-1);R.push(s),_=C,C=te({inputs:{x:C},attrs:{shape:R},backend:e}),Ip(e,_);let D=E;return E=te({inputs:{x:E},attrs:{shape:R},backend:e}),Ip(e,D),[E,C]}var cP={kernelName:Es,backendName:"webgl",kernelFunc:lre};var Fg=class{constructor(t,e,o,n,s,a){this.variableNames=["Image","Transforms"],this.outputShape=a;let i=o==="nearest"?1:2,p;switch(n){case"constant":p=1;break;case"reflect":p=2;break;case"wrap":p=3;break;case"nearest":p=4;break;default:p=1;break}this.userCode=`
2022-11-18 17:13:29 +01:00
float mapCoord(float outCoord, float len) {
float inCoord = outCoord;
if(${p} == 2) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz2 = 2.0 * len;
if (inCoord < sz2) {
inCoord = sz2 * float(int(float(-inCoord / sz2))) +
inCoord;
}
inCoord = inCoord < -len ? inCoord + sz2 : -inCoord - 1.0;
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz2 = 2.0 * len;
inCoord -= sz2 * float(int(float(inCoord / sz2)));
if (inCoord >= len) {
inCoord = sz2 - inCoord - 1.0;
}
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (${p} == 3) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz = len - 1.0;
inCoord += len * (float(int(float(-inCoord / sz))) + 1.0);
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz = len - 1.0;
inCoord -= len * float(int(float(inCoord / sz)));
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (${p} == 4) {
return clamp(outCoord, 0.0, len - 1.0);
} else {
return outCoord;
}
}
float readWithFillValue(int batch, int coordY, int coordX,
int channel) {
float outputValue;
2023-08-05 15:03:11 +02:00
if (0 <= coordY && coordY < ${t} && 0 <= coordX && coordX < ${e}) {
2022-11-18 17:13:29 +01:00
outputValue = getImage(batch, coordY, coordX, channel);
} else {
outputValue = float(${s});
}
return outputValue;
}
void main() {
ivec4 coords = getOutputCoords();
float outputValue;
int batch = coords[0];
int x = coords[2];
int y = coords[1];
int channel = coords[3];
float xf = float(x);
float yf = float(y);
float a1 = getTransforms(batch, 0);
float a2 = getTransforms(batch, 1);
float a3 = getTransforms(batch, 2);
float b1 = getTransforms(batch, 3);
float b2 = getTransforms(batch, 4);
float b3 = getTransforms(batch, 5);
float c1 = getTransforms(batch, 6);
float c2 = getTransforms(batch, 7);
float projection = c1 * xf + c2 * yf + 1.0;
if (projection == 0.0) {
outputValue = float(${s});
} else {
float inX = (a1 * xf + a2 * yf + a3) / projection;
float inY = (b1 * xf + b2 * yf + b3) / projection;
2023-08-05 15:03:11 +02:00
float mapX = mapCoord(inX, float(${e}));
float mapY = mapCoord(inY, float(${t}));
2022-11-18 17:13:29 +01:00
if (${i} == 1) {
int coordY = int(round(mapY));
int coordX = int(round(mapX));
outputValue = readWithFillValue(batch, coordY, coordX,
channel);
} else {
float yFloor = floor(mapY);
float xFloor = floor(mapX);
float yCeil = yFloor + 1.0;
float xCeil = xFloor + 1.0;
float valueYFloor = (xCeil - mapX) *
readWithFillValue(batch, int(yFloor), int(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, int(yFloor), int(xCeil), channel);
float valueYCeil = (xCeil - mapX) *
readWithFillValue(batch, int(yCeil), int(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, int(yCeil), int(xCeil), channel);
outputValue = (yCeil - mapY) * valueYFloor +
(mapY - yFloor) * valueYCeil;
}
}
setOutput(outputValue);
}
`}};function mre(r){let{inputs:t,backend:e,attrs:o}=r,{image:n,transforms:s}=t,{interpolation:a,fillMode:i,fillValue:p,outputShape:u}=o,[c,l,m,d]=n.shape,[f,h]=u!=null?u:[l,m],g=[c,f,h,d],x=new Fg(l,m,a,i,p,g);return e.runWebGLProgram(x,[n,s],"float32")}var lP={kernelName:Rs,backendName:"webgl",kernelFunc:mre};function dre(r){let{inputs:t,attrs:e,backend:o}=r,{axis:n}=e,{x:s}=t;Vs(s,"unique"),console.warn("WARNING: ","UI might be locked temporarily as data is being downloaded");let a=o.readSync(s.dataId),{outputValues:i,outputShape:p,indices:u}=SD(a,n,s.shape,s.dtype);return[o.makeTensorInfo(p,s.dtype,i),o.makeTensorInfo([u.length],"int32",u)]}var mP={kernelName:Xi,backendName:"webgl",kernelFunc:dre};function fre(r){let{inputs:t,backend:e,attrs:o}=r,{value:n}=t,{axis:s}=o;s<0&&(s+=n.shape.length);let a=n,i=a.shape.length,p=n.shape[s],u=new Array(i-1),c=0;for(let h=0;h<i;h++)h!==s&&(u[c++]=a.shape[h]);let l=[],m=new Array(i).fill(0),d=a.shape.slice();d[s]=1;let f=new Array(p);for(let h=0;h<f.length;h++){m[s]=h;let g=Gs({inputs:{x:a},backend:e,attrs:{begin:m,size:d}}),x=te({inputs:{x:g},backend:e,attrs:{shape:u}});f[h]=x,l.push(g)}return l.forEach(h=>e.disposeIntermediateTensorInfo(h)),f}var dP={kernelName:wa,backendName:"webgl",kernelFunc:fre};var Pg=class{constructor(t,e){this.variableNames=["x","segmentIds"];let o=t.windowSize,n=t.batchSize,s=t.inSize,a=t.numSegments,i=a*Math.ceil(s/o);this.outputShape=[n,i];let p="0.0",u="sumValue",c=Math.floor(o/4)*4,l=o%4,m=`
2022-11-18 17:13:29 +01:00
sumValue += dot(values, segFilter);
2022-11-20 22:20:02 +01:00
`,d="";s%o>0&&(d=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return initializationValue;
}
2022-11-20 22:20:02 +01:00
`);let f="";s%o>0&&(f=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return -1.0;
}
`),this.userCode=`
const float initializationValue = ${p};
float getValue(int batch, int inIdx) {
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
return getX(batch, inIdx);
}
float getSegmentIdAtIndex(int inIdx) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
return getSegmentIds(inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = int(floor(float(outIdx) / float(
${a})) * float(${o}));
int currentSeg = int(mod(float(outIdx), float(${a})));
float sumValue = 0.0;
for (int i = 0; i < ${c}; i += 4) {
int inIdx = inOffset + i;
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 2)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 3)) == currentSeg ? 1 : 0
);
${m}
}
int inIdx = inOffset + ${c};
if (${l===1}) {
vec4 values = vec4(
getValue(batch, inIdx),
initializationValue,
initializationValue,
initializationValue
);
int inIdxSeg = int(getSegmentIdAtIndex(inIdx));
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
0,
0,
0
);
${m}
} else if (${l===2}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
initializationValue,
initializationValue
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
0,
0
);
${m}
} else if (${l===3}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
initializationValue
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 2)) == currentSeg ? 1 : 0,
0
);
${m}
}
setOutput(${u});
}
`}};function hre(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,segmentIds:s}=t,{numSegments:a}=o,i=n.shape.length,p=[],u=0,c=w.getAxesPermutation([u],i),l=n;c!=null&&(l=bt({inputs:{x:n},backend:e,attrs:{perm:c}}),p.push(l),u=w.getInnerMostAxes(1,i)[0]);let m=w.segment_util.computeOutShape(l.shape,u,a),d=y.sizeFromShape([l.shape[u]]),f=te({inputs:{x:l},backend:e,attrs:{shape:[-1,d]}});p.push(f);let h=oi(n.dtype),g=(S,k,_,E,R)=>{let D=S.shape[0],P=S.shape[1],O=w.segment_util.segOpComputeOptimalWindowSize(P,R),M={windowSize:O,inSize:P,batchSize:D,numSegments:R},L=new Pg(M,k),B=e.compileAndRun(L,[S,_],E);if(p.push(B),B.shape[1]===R)return B;let z=Dv({backend:e,attrs:{start:0,stop:R,step:1,dtype:"float32"}}),U=Av({inputs:{x:z},backend:e,attrs:{reps:[P/O]}});return p.push(z),p.push(U),g(B,k,U,E,R)},x=g(f,"unsortedSegmentSum",s,h,a),b=te({inputs:{x},backend:e,attrs:{shape:m}}),C=b;if(c!=null){p.push(b);let S=w.getUndoAxesPermutation(c);C=bt({inputs:{x:C},backend:e,attrs:{perm:S}})}return p.forEach(S=>e.disposeIntermediateTensorInfo(S)),C}var fP={kernelName:Yi,backendName:"webgl",kernelFunc:hre};var gre=[YD,ZD,JD,eA,rA,oA,nA,sA,uA,pA,cA,lA,mA,dA,fA,hA,gA,xA,yA,bA,CA,SA,IA,vA,kA,$A,RA,DA,zD,FA,OA,MA,LA,BA,zA,VA,WA,UA,GA,HA,jA,XA,YA,QA,ZA,JA,eF,tF,rF,oF,nF,sF,aF,iF,uF,pF,lF,mF,dF,fF,gF,xF,yF,bF,CF,wF,SF,IF,vF,BD,kF,PA,NF,TF,_F,VD,$F,EF,RF,DF,AF,FF,PF,OF,MF,LF,zF,VF,WF,UF,GF,HF,qF,XF,YF,QF,ZF,JF,n3,GD,s3,a3,i3,u3,NA,p3,m3,d3,f3,h3,WD,g3,x3,y3,b3,C3,TA,e3,w3,S3,I3,KD,v3,k3,N3,T3,_3,$3,E3,R3,D3,A3,F3,P3,O3,M3,L3,B3,wA,o3,z3,V3,W3,U3,G3,H3,K3,q3,X3,Y3,Z3,J3,eP,tP,rP,oP,nP,r3,jD,sP,aP,iP,uP,cP,lP,XD,mP,dP,fP,c3];for(let r of gre)ti(r);var we;(function(r){r[r.float32=0]="float32",r[r.int32=1]="int32",r[r.bool=2]="bool",r[r.string=3]="string",r[r.complex64=4]="complex64"})(we||(we={}));var bu;(function(r){r[r.linear=0]="linear",r[r.relu=1]="relu",r[r.relu6=2]="relu6",r[r.prelu=3]="prelu",r[r.leakyrelu=4]="leakyrelu",r[r.sigmoid=5]="sigmoid",r[r.elu=6]="elu"})(bu||(bu={}));var hP;function xre(r){hP=r.wasm.cwrap(So,null,["number","array","number","number","array","number","number","number","number","number","number","number","number"])}function yre(r){let{inputs:t,backend:e,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=t;if(n.dtype!=="float32"||s.dtype!=="float32")throw new Error("_FusedMatMul for non non-float32 tensors not yet supported.");let{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o,m=e.dataIdMap.get(n.dataId).id,d=e.dataIdMap.get(s.dataId).id,f=0;if(a!=null){let R=e.dataIdMap.get(a.dataId);if(R.shape.length!==1)throw new Error(`_FusedMatMul only supports rank-1 bias but got rank ${R.shape.length}.`);f=R.id}let h=i==null?0:e.dataIdMap.get(i.dataId).id,g=bu[c];if(g==null)throw new Error(`${c} activation not yet supported for FusedConv2D in the wasm backend.`);let x=p?n.shape[2]:n.shape[1],b=u?s.shape[1]:s.shape[2],C=Sr.assertAndGetBroadcastShape(n.shape.slice(0,-2),s.shape.slice(0,-2)),S=e.makeOutput([...C,x,b],n.dtype),k=e.dataIdMap.get(S.dataId).id,_=new Uint8Array(new Int32Array(n.shape).buffer),E=new Uint8Array(new Int32Array(s.shape).buffer);return hP(m,_,n.shape.length,d,E,s.shape.length,p,u,g,f,h,l||0,k),S}var gP={kernelName:So,backendName:"wasm",setupFunc:xre,kernelFunc:yre};function he(r,t){let e;function o(s){e=s.wasm.cwrap(r,null,["number","number","number"])}function n(s){let{backend:a,inputs:{x:i}}=s,p=a.dataIdMap.get(i.dataId).id,u=a.makeOutput(i.shape,t||i.dtype),c=a.dataIdMap.get(u.dataId).id;return y.sizeFromShape(u.shape)===0||e(p,we[i.dtype],c),u}return{kernelName:r,backendName:"wasm",setupFunc:o,kernelFunc:n}}var xP=he(Xs);var yP=he(Vo);var bP=he(Wo);function Ue(r,t,e){let o;function n(a){o=a.wasm.cwrap(r,null,["number","array","number","number","array","number","number","number"])}function s(a){let{backend:i,inputs:p}=a,{a:u,b:c}=p,l=i.dataIdMap.get(u.dataId).id,m=i.dataIdMap.get(c.dataId).id,d=e!=null?e:u.dtype,f=w.assertAndGetBroadcastShape(u.shape,c.shape),h=i.makeOutput(f,d);if(y.sizeFromShape(f)===0)return h;let g=new Uint8Array(new Int32Array(u.shape).buffer),x=new Uint8Array(new Int32
2022-11-18 17:13:29 +01:00
${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=t.dataIdMap.get(o.dataId).id,i=t.dataIdMap.get(n.dataId).id,p=t.dataIdMap.get(s.dataId).id,u=o.shape[0],c=y.sizeFromShape(s.shape),l=t.makeOutput([u,c],o.dtype),m=t.dataIdMap.get(l.dataId).id,d=t.makeOutput([c],s.dtype),f=t.dataIdMap.get(d.dataId).id,h=t.makeOutput([3],"int32"),g=t.dataIdMap.get(h.dataId).id;ZL(a,i,p,u,m,f,g);let x=t.readSync(h.dataId),b;switch(x[0]){case 0:{b=w.getSparseReshapeMultipleNegativeOneOutputDimErrorMessage(x[1],x[2]);break}case 1:{b=w.getSparseReshapeNegativeOutputDimErrorMessage(x[1],x[2]);break}case 2:b=w.getSparseReshapeEmptyTensorZeroOutputDimErrorMessage();break;case 3:{let C=Array.from(t.readSync(n.dataId)),S=Array.from(t.readSync(d.dataId));b=w.getSparseReshapeInputOutputMultipleErrorMessage(C,S);break}case 4:{let C=Array.from(t.readSync(n.dataId)),S=Array.from(t.readSync(d.dataId));b=w.getSparseReshapeInputOutputMismatchErrorMessage(C,S);break}default:b=""}if(t.disposeData(h.dataId),b)throw t.disposeData(l.dataId),t.disposeData(d.dataId),new Error(b);return[l,d]}var JL={kernelName:ei,backendName:"wasm",setupFunc:hse,kernelFunc:gse};var eB;function Bg(r){eB=r.wasm.cwrap("SparseSegmentReduction",null,["number","number","number","number","number","number","number","number","number"])}function zg(r,t){let{backend:e,inputs:o}=r,{data:n,indices:s,segmentIds:a}=o,i=s.shape[0],p=e.readSync(a.dataId,i-1,i)[0],c=i>0?p+1:0;if(c<0)throw new Error(w.getSparseSegmentReductionNegativeSegmentIdsErrorMessage());let l=n.shape.slice();l[0]=c;let m=e.dataIdMap.get(n.dataId).id,d=e.dataIdMap.get(s.dataId).id,f=e.dataIdMap.get(a.dataId).id,h=e.makeOutput(l,n.dtype),g=e.dataIdMap.get(h.dataId).id,x=e.makeOutput([4],"int32"),b=e.dataIdMap.get(x.dataId).id;eB(m,we[n.dtype],n.shape[0],d,f,g,b,t,0);let C=e.readSync(x.dataId),S;switch(C[0]){case 0:{S=w.getSparseSegmentReductionNegativeSegmentIdsErrorMessage();break}case 1:{S=w.getSparseSegmentReductionNonIncreasingSegmentIdsErrorMessage();break}case 2:S=w.getSparseSegmentReductionSegmentIdOutOfRangeErrorMessage(C[1],C[2]);break;case 3:S=w.getSparseSegmentReductionIndicesOutOfRangeErrorMessage(C[1],C[2],C[3]);break;default:S=""}if(e.disposeData(x.dataId),S)throw e.disposeData(h.dataId),new Error(S);return h}function xse(r){return zg(r,!0)}var tB={kernelName:ya,backendName:"wasm",setupFunc:Bg,kernelFunc:xse};function yse(r){return zg(r,!1)}var rB={kernelName:ba,backendName:"wasm",setupFunc:Bg,kernelFunc:yse};var oB;function bse(r){oB=r.wasm.cwrap(vs,null,["number","number","number","number","number","number","number","number","array","number","number"])}function Cse(r){let{backend:t,inputs:e,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=e,{outputShape:i}=o,p=t.makeOutput(i,a.dtype);if(y.sizeFromShape(i)===0)return p;let{sliceRank:u,numUpdates:c,sliceSize:l,strides:m,outputSize:d}=w.calculateShapes(s,n,i),f=t.dataIdMap.get(n.dataId).id,h=t.dataIdMap.get(s.dataId).id,g=t.dataIdMap.get(a.dataId).id,x=new Uint8Array(new Int32Array(m).buffer),b=t.dataIdMap.get(p.dataId).id;return oB(f,h,s.shape.length,g,we[a.dtype],u,c,l,x,d,b),p}var nB={kernelName:vs,backendName:"wasm",setupFunc:bse,kernelFunc:Cse};function wse(r){let{inputs:t,attrs:e,backend:o}=r,{x:n}=t,{numOrSizeSplits:s,axis:a}=e,i=y.parseAxisParam(a,n.shape)[0],p=w.prepareSplitSize(n,s,i),u=new Array(n.shape.length).fill(0),c=n.shape.slice();return p.map(l=>{let m=[...c];m[i]=l;let d=Po({inputs:{x:n},attrs:{begin:u,size:m},backend:o});return u[i]+=l,d})}var sB={kernelName:xa,backendName:"wasm",kernelFunc:wse};var aB=he(ws);var iB=he(Ki);var Sse=!0,uB=Ue(ks,Sse);var pB;function Ise(r){pB=r.wasm.cwrap(wo,null,["number","number","number","number"])}function vse(r){let{backend:t,inputs:e,attrs:o}=r,{alpha:n}=o,{x:s}=e,a=t.dataIdMap.get(s.dataId).id,i=t.makeOutput(s.shape,s.dtype),p=t.dataIdMap.get(i.dataId).id;return pB(a,n,we[s.dtype],p),i}var cB={kernelName:wo,backendName:"wasm",setupFunc:Ise,kernelFunc:vse};var lB;function kse(r){lB=r.wasm.cwrap(Ns,null,["number","array","
2023-01-06 19:23:06 +01:00
{
var oldValue = 0;
loop {
2023-08-05 15:03:11 +02:00
let newValueF32 = bitcast<f32>(oldValue) + (${t});
2023-01-06 19:23:06 +01:00
let newValue = bitcast<i32>(newValueF32);
let res = atomicCompareExchangeWeak(${r}, oldValue, newValue);
if res.exchanged {
break;
}
oldValue = res.old_value;
}
}`;var Ci;(function(r){r[r.FROM_PIXELS=0]="FROM_PIXELS",r[r.DRAW=1]="DRAW"})(Ci||(Ci={}));var QB=(r,t,e,o,n)=>{let s={dtype:o.dtype,shape:o.shape},a=rae(e,s,t),i=r.createShaderModule({code:a,label:t.constructor.name}),p=A().get("WEBGPU_PRINT_SHADER");if(p!==""){p=p.toLowerCase();let u=p.split(",");(p==="all"||u.some(c=>t.shaderKey.toLowerCase().includes(c)))&&(console.group(t.shaderKey),console.debug(a),console.groupEnd())}return n?r.createComputePipelineAsync({compute:{module:i,entryPoint:"_start"},label:t.constructor.name,layout:"auto"}):r.createComputePipeline({compute:{module:i,entryPoint:"_start"},label:t.constructor.name,layout:"auto"})},Ae=(r,t="f32")=>{switch(r){case 1:return`${t}`;case 2:return`vec2<${t}>`;case 3:return`vec3<${t}>`;case 4:return`vec4<${t}>`;default:throw new Error(`${r}-component ${t} is not supported.`)}};function ft(r){if(r<=1)return"i32";if(r===2)return"vec2<i32>";if(r===3)return"vec3<i32>";if(r===4)return"vec4<i32>";if(r===5)return"vec5";if(r===6)return"vec6";throw Error(`GPU for rank ${r} is not yet supported`)}function Oo(r){if(r===0)return"x";if(r===1)return"y";if(r===2)return"z";if(r===3)return"w";if(r===4)return"u";if(r===5)return"v";throw Error(`Index ${r} is not yet supported`)}function G(...r){let t;switch(r.length){case 0:t=`
2022-11-18 17:13:29 +01:00
fn main()
2023-08-05 15:03:11 +02:00
`;break;case 1:t=`
2022-11-18 17:13:29 +01:00
fn main(${r[0]} : i32)
`;break;default:throw Error("Unreachable")}return t}function jB(r,t){let e;return e=`
2023-08-05 15:03:11 +02:00
${tae(t)}
2022-11-20 22:20:02 +01:00
fn _start(@builtin(local_invocation_id) LocalId : vec3<u32>,
@builtin(global_invocation_id) GlobalId : vec3<u32>,
@builtin(local_invocation_index) LocalIndex: u32,
@builtin(workgroup_id) WorkgroupId : vec3<u32>,
@builtin(num_workgroups) NumWorkgroups : vec3<u32>) {
localId = LocalId;
localIndex = LocalIndex;
globalId = GlobalId;
numWorkgroups = NumWorkgroups;
workgroupId = WorkgroupId;
${r?"main(getGlobalIndex());":"main();"};
}
2023-08-05 15:03:11 +02:00
`,e}function tae(r){return`
2023-01-06 19:23:06 +01:00
@compute @workgroup_size(${r.workgroupSize[0]}, ${r.workgroupSize[1]}, ${r.workgroupSize[2]})
2023-08-05 15:03:11 +02:00
`}function rae(r,t,e){let o=[],n=e.workgroupSize[0]*e.workgroupSize[1]*e.workgroupSize[2];if(e.outputComponent=e.outputComponent?e.outputComponent:1,o.push(`
2022-11-18 17:13:29 +01:00
var<private> localId: vec3<u32>;
2022-11-20 22:20:02 +01:00
var<private> localIndex: u32;
2022-11-18 17:13:29 +01:00
var<private> globalId: vec3<u32>;
var<private> numWorkgroups: vec3<u32>;
2022-11-20 22:20:02 +01:00
var<private> workgroupId: vec3<u32>;
2022-11-18 17:13:29 +01:00
// Only used when the y/z dimension of workgroup size is 1.
fn getGlobalIndex() -> i32 {
${JB(e)?" return i32(globalId.x);":` return i32((workgroupId.z * numWorkgroups.x * numWorkgroups.y +
2023-01-06 19:23:06 +01:00
workgroupId.y * numWorkgroups.x + workgroupId.x) * ${n}u +
2022-11-20 22:20:02 +01:00
localIndex);
2022-11-18 17:13:29 +01:00
`}
}
`),e.pixelsOpType!=null){let f=e.pixelsOpType===Ci.FROM_PIXELS?`@group(0) @binding(0) var<storage, read_write> result: array<${Cu(t.dtype,e.outputComponent)}>;`:`@group(0) @binding(1) var<storage, read> inBuf : array<${Cu(r[0].dtype,e.outputComponent)}>;`,h=t.shape.length===3?"vec2<i32>":"i32";o.push(`
2022-11-18 17:13:29 +01:00
struct Uniform {
2023-08-05 15:03:11 +02:00
outShapeStrides : ${h},
2022-11-18 17:13:29 +01:00
size : i32,
numChannels : i32,
2023-08-05 15:03:11 +02:00
alpha : f32,
2022-11-18 17:13:29 +01:00
};
2023-08-05 15:03:11 +02:00
${f}
2022-11-18 17:13:29 +01:00
@group(0) @binding(2) var<uniform> uniforms: Uniform;
`);let g=YB(e);return[XB,o.join(`
`),um(t.shape),e.getUserCode(),jB(g,e)].join(`
`)}let s,a,i="struct Uniforms { NAN : f32, INFINITY : f32, ";e.variableNames.forEach((f,h)=>{let g=ft(r[h].shape.length);i+=`${f.charAt(0).toLowerCase()+f.slice(1)}Shape : ${g}, `,s=r[h].shape.length-1,a=ft(s),i+=`${f.charAt(0).toLowerCase()+f.slice(1)}ShapeStrides: ${a}, `});let p=ft(t.shape.length);i+=`outShape : ${p}, `,s=t.shape.length-1,a=ft(s),i+=`
2023-08-05 15:03:11 +02:00
outShapeStrides: ${a}, `,e.size&&(i+="size : i32, "),e.uniforms&&(i+=e.uniforms),i+="};",i=cae(i),o.push(i),e.atomic?o.push(`
2022-11-18 17:13:29 +01:00
@group(0) @binding(0) var<storage, read_write> result: array<atomic<i32>>;
`):o.push(`
@group(0) @binding(0) var<storage, read_write> result: array<${Cu(t.dtype,e.outputComponent)}>;
2023-08-05 15:03:11 +02:00
`),e.variableNames.forEach((f,h)=>{o.push(`
@group(0) @binding(${1+h}) var<storage, read> ${f}: array<${e.variableComponents?Cu(r[h].dtype,e.variableComponents[h]):Cu(r[h].dtype,e.outputComponent)}>;
2023-05-08 15:12:41 +02:00
`)}),i!==""&&o.push(`
2023-08-05 15:03:11 +02:00
@group(0) @binding(${1+e.variableNames.length}) var<uniform> uniforms: Uniforms;
`);let u=iae(t.shape,e.dispatchLayout),c=[XB,o.join(`
`)+oae,um(t.shape),u,uae(t.shape.length)];e.atomic||c.push(pae(t.shape,t.dtype,e.outputComponent)),e.variableNames.forEach((f,h)=>{c.push(`${um(r[h].shape,f)}`)});let l=r.map((f,h)=>aae(f,t.shape,e.variableComponents?e.variableComponents[h]:e.outputComponent,e.dispatchLayout.x.length===t.shape.length)).join(`
`);c.push(l),c.push(e.getUserCode());let m=YB(e);return c.push(jB(m,e)),c.join(`
`)}function ZB(r,t,e){let o=r.shaderKey;if(r.pixelsOpType!=null)return o;let n=[],s=[];t.forEach(c=>{n.push(c.shape),s.push(c.dtype)}),n.push(e.shape),s.push(e.dtype);let a=t.map(c=>w.getBroadcastDims(c.shape,e.shape)),i=t.map(c=>y.arraysEqual(c.shape,e.shape)).join("_"),p=a.map(c=>c.join("_")).join(";"),u=JB(r)?"flatDispatch":"";return o+="_"+(r.workgroupSize?r.workgroupSize.join(","):"")+n.map(c=>c.length).join(",")+s.join(",")+r.variableNames.join(",")+p+i+u,o}var XB=`
2022-11-18 17:13:29 +01:00
struct vec5 {x: i32, y: i32, z: i32, w: i32, u: i32};
struct vec6 {x: i32, y: i32, z: i32, w: i32, u: i32, v: i32};
// Checks whether coordinates lie within the bounds of the shape.
fn coordsInBounds2D(coord : vec2<i32>, shape : vec2<i32>) -> bool {
return all(coord >= vec2<i32>(0)) && all(coord < shape);
}
fn coordsInBounds3D(coord : vec3<i32>, shape : vec3<i32>) -> bool {
return all(coord >= vec3<i32>(0)) && all(coord < shape);
}
fn coordsInBounds4D(coord : vec4<i32>, shape : vec4<i32>) -> bool {
return all(coord >= vec4<i32>(0)) && all(coord < shape);
}
fn getIndexFromCoords1D(coord : i32, shape : i32) -> i32 {
return coord;
}
fn getIndexFromCoords2D(coords : vec2<i32>, shape : vec2<i32>) -> i32 {
return dot(coords, vec2<i32>(shape.y, 1));
}
fn getIndexFromCoords3D(coords : vec3<i32>, shape : vec3<i32>) -> i32 {
return dot(coords, vec3<i32>(shape.y * shape.z, shape.z, 1));
}
fn getIndexFromCoords4D(coords : vec4<i32>, shape : vec4<i32>) -> i32 {
return dot(coords, vec4<i32>(
shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1));
}
fn getIndexFromCoords5D(coords : vec5, shape : vec5) -> i32 {
let shapeStrides: vec5 = vec5(shape.y * shape.z * shape.w * shape.u, shape.z * shape.w * shape.u, shape.w * shape.u, shape.u, 1);
return coords.x*shapeStrides.x + coords.y*shapeStrides.y + coords.z*shapeStrides.z + coords.w*shapeStrides.w + coords.u*shapeStrides.u;
}
fn getIndexFromCoords6D(coords : vec6, shape : vec6) -> i32 {
let shapeStrides: vec6 = vec6(shape.y * shape.z * shape.w * shape.u * shape.v, shape.z * shape.w * shape.u * shape.v, shape.w * shape.u * shape.v, shape.u * shape.v, shape.v, 1);
return coords.x*shapeStrides.x + coords.y*shapeStrides.y + coords.z*shapeStrides.z + coords.w*shapeStrides.w + coords.u*shapeStrides.u + coords.v*shapeStrides.v;
}
// NaN defination in IEEE 754-1985 is :
// - sign = either 0 or 1.
// - biased exponent = all 1 bits.
// - fraction = anything except all 0 bits (since all 0 bits represents infinity).
// https://en.wikipedia.org/wiki/IEEE_754-1985#Representation_of_non-numbers
fn isnan(val: f32) -> bool {
let floatToUint: u32 = bitcast<u32>(val);
return (floatToUint & 0x7fffffffu) > 0x7f800000u;
}
fn isnanVec4(val : vec4<f32>) -> vec4<bool> {
2023-01-06 19:23:06 +01:00
let floatToUint: vec4<u32> = bitcast<vec4<u32>>(val);
return (floatToUint & vec4<u32>(0x7fffffffu)) > vec4<u32>(0x7f800000u);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`,oae=`
2022-11-20 22:20:02 +01:00
fn isinf(val: f32) -> bool {
return abs(val) == uniforms.INFINITY;
}
`;function um(r,t=""){let e=r.length,o=t!==""?`get${t.charAt(0).toUpperCase()+t.slice(1)}CoordsFromIndex`:"getCoordsFromIndex",n=t!==""?`${t.charAt(0).toLowerCase()+t.slice(1)}ShapeStrides`:"outShapeStrides";if(e<=1)return`fn ${o}(index : i32) -> i32 { return index; }`;let s=y.computeStrides(r),a=ft(e),i=[];for(let u=0;u<e;u++)i.push(`d${u}`);if(s.length===1)return` fn ${o}(index : i32) -> vec2<i32> {
2023-05-08 15:12:41 +02:00
let d0 = index / uniforms.${n}; let d1 = index - d0 * uniforms.${n};
2022-11-18 17:13:29 +01:00
return vec2<i32>(d0, d1);
}`;let p;return p="var index2 = index;"+s.map((u,c)=>{let l=`let ${i[c]} = index2 / uniforms.${n}.${Oo(c)}`,m=c===s.length-1?`let ${i[c+1]} = index2 - ${i[c]} * uniforms.${n}.${Oo(c)}`:`index2 = index2 - ${i[c]} * uniforms.${n}.${Oo(c)}`;return`${l}; ${m};`}).join(""),`
2023-05-08 15:12:41 +02:00
fn ${o}(index : i32) -> ${a} {
${p}
return ${a}(${i.join(",")});
2022-11-18 17:13:29 +01:00
}
`}function nae(r,t){let e=r.name,o=r.shape.length,n=ft(o),s="get"+e.charAt(0).toUpperCase()+e.slice(1),a=["d0","d1","d2","d3","d4","d5"].slice(0,o),i=a.map(c=>`${c} : i32`).join(", ");if(o<1)return`
2023-08-05 15:03:11 +02:00
fn ${s}() -> ${Ae(t)} {
return ${Ae(t)}(${e}[0]);
}
`;let p=`uniforms.${e.charAt(0).toLowerCase()+e.slice(1)}Shape`,u=`${o}D`;return o===0&&(u="1D"),`
fn ${s}(${i}) -> ${Ae(t)} {
return ${Ae(t)}(${e}[getIndexFromCoords${u}(${n}(${a.join(",")}),
${p})${t===1?"":` / ${t}`}]);
2022-11-18 17:13:29 +01:00
}
`}function sae(r,t,e,o){let n=r.name,s=n.charAt(0).toUpperCase()+n.slice(1),a="get"+s+"ByOutput",i=r.shape.length,p=t.length,u=ft(p);if(y.arraysEqual(r.shape,t)&&o)return`
2023-08-05 15:03:11 +02:00
fn ${a}Index(globalIndex : i32) -> ${Ae(e)} {
return ${Ae(e)}(${n}[globalIndex]);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
fn ${a}Coords(coords : ${u}) -> ${Ae(e)} {
return ${Ae(e)}(${n}[${p>1?"getOutputIndexFromCoords(coords)":"coords"}${e===1?"":` / ${e}`}]);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`;let c=w.getBroadcastDims(r.shape,t),l=p-i,m="";if(i===0)return`
fn ${a}Index(globalIndex : i32) -> ${Ae(e)}{
2022-11-18 17:13:29 +01:00
return get${s}();
}
2023-08-05 15:03:11 +02:00
fn ${a}Coords(coords : ${u}) -> ${Ae(e)}{
2022-11-18 17:13:29 +01:00
return get${s}();
}
`;p<2&&c.length>=1?m="coords = 0;":m=c.map(g=>`coords.${Oo(g+l)} = 0;`).join(`
`);let d="";if(p<2&&i>0)d="coords";else if(p>1){let g=ft(i),x=r.shape.map((b,C)=>`coords.${Oo(C+l)}`).join(", ");d=`${g}(${x})`}else d="coords";let f=`uniforms.${n.charAt(0).toLowerCase()+n.slice(1)}Shape`,h=`${i}D`;return`
2023-08-05 15:03:11 +02:00
fn ${a}Index(globalIndex : i32) -> ${Ae(e)} {
2022-11-18 17:13:29 +01:00
var coords = getCoordsFromIndex(globalIndex);
${m}
2023-08-05 15:03:11 +02:00
return ${Ae(e)}(${n}[getIndexFromCoords${h}(${d}, ${f})${e===1?"":` / ${e}`}]);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
fn ${a}Coords(coordsIn : ${u}) -> ${Ae(e)} {
2022-11-18 17:13:29 +01:00
var coords = coordsIn;
${m}
2023-08-05 15:03:11 +02:00
return ${Ae(e)}(${n}[getIndexFromCoords${h}(${d}, ${f})${e===1?"":` / ${e}`}]);
2022-11-18 17:13:29 +01:00
}
`}function aae(r,t,e,o){let n=nae(r,e);return r.shape.length<=t.length&&(n+=sae(r,t,e,o)),n}function iae(r,t){let{x:e,y:o=[],z:n=[]}=t,s=r.length,a=e.length+o.length+n.length;if(a!==s)return"";if(e.length===s)return`fn getOutputCoords() -> ${ft(s)}{
2022-11-18 17:13:29 +01:00
let globalIndex = getGlobalIndex();
return getCoordsFromIndex(globalIndex);
}
`;let i="",p=[e,o,n];for(let m=0;m<p.length;m++){let d=p[m];if(d.length!==0)if(d.length===1)i+=`let d${d[0]} = i32(globalId[${m}]);`;else{let f=qB(d,"uniforms.outShape");i+=`var index${m} = i32(globalId[${m}]);`;for(let h=0;h<f.length;h++)i+=`let d${d[h]} = index${m} / ${f[h]};`,h===f.length-1?i+=`let d${d[h+1]} = index${m} - d${d[h]} * ${f[h]};`:i+=`index${m} = index${m} - d${d[h]} * ${f[h]};`}}let u=[];for(let m=0;m<a;m++)u.push(`d${m}`);let c=ft(a),l=`fn getOutputCoords() -> ${c} {
2022-11-18 17:13:29 +01:00
${i}
2023-08-05 15:03:11 +02:00
`;return u.length===0?l+=`return ${c}(0); }`:l+=`return ${c}(${u.join(",")}); }`,l}function uae(r){let t="";switch(r){case 0:case 1:t+=`
2022-11-18 17:13:29 +01:00
fn getOutputIndexFromCoords(coords : i32) -> i32 {
return coords;
}
2023-08-05 15:03:11 +02:00
`;break;case 2:t+=`
2022-11-18 17:13:29 +01:00
fn getOutputIndexFromCoords(coords : vec2<i32>) -> i32 {
return dot(coords, vec2<i32>(uniforms.outShapeStrides, 1));
}
2023-08-05 15:03:11 +02:00
`;break;case 3:t+=`
2022-11-18 17:13:29 +01:00
fn getOutputIndexFromCoords(coords : vec3<i32>) -> i32 {
return dot(coords, vec3<i32>(uniforms.outShapeStrides.x, uniforms.outShapeStrides.y, 1));
}
2023-08-05 15:03:11 +02:00
`;break;case 4:t+=`
2022-11-18 17:13:29 +01:00
fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
return dot(coords, vec4<i32>(
uniforms.outShapeStrides.x, uniforms.outShapeStrides.y, uniforms.outShapeStrides.z, 1));
}
2023-08-05 15:03:11 +02:00
`;break;case 5:t+=`
2022-11-18 17:13:29 +01:00
fn getOutputIndexFromCoords(coords : vec5) -> i32 {
return coords.x * uniforms.outShapeStrides.x +
coords.y * uniforms.outShapeStrides.y +
coords.z * uniforms.outShapeStrides.z +
coords.w * uniforms.outShapeStrides.w +
coords.u;
}
2023-08-05 15:03:11 +02:00
`;break;case 6:t+=`
2022-11-18 17:13:29 +01:00
fn getOutputIndexFromCoords(coords : vec6) -> i32 {
return coords.x * uniforms.outShapeStrides.x +
coords.y * uniforms.outShapeStrides.y +
coords.z * uniforms.outShapeStrides.z +
coords.w * uniforms.outShapeStrides.w +
coords.u * uniforms.outShapeStrides.u +
coords.v;
}
`;break;default:y.assert(!1,()=>`Unsupported ${r}D shape`);break}return t}function JB(r){return r.dispatch[1]===1&&r.dispatch[2]===1}function Cu(r,t=1){if(r==="float32")return Ae(t,"f32");if(r==="int32"||r==="bool")return Ae(t,"i32");throw new Error(`type ${r} is not supported.`)}function pae(r,t,e){let o=r.length,n=Cu(t,e),s=`fn setOutputAtIndex(flatIndex : i32, value : ${Ae(e)}) {
2022-11-18 17:13:29 +01:00
result[flatIndex] = ${n}(value);
}
2023-05-08 15:12:41 +02:00
2023-08-05 15:03:11 +02:00
fn setOutputAtIndexI32(flatIndex : i32, value : ${Ae(e,"i32")}) {
2022-11-18 17:13:29 +01:00
result[flatIndex] = ${n}(value);
}
`;if(o>=2){let a=["d0","d1","d2","d3","d4","d5"].slice(0,o),i=ft(o);s+=`
2023-08-05 15:03:11 +02:00
fn setOutputAtCoords(${a.map(p=>`${p} : i32`).join(", ")}, value : ${Ae(e)}) {
2022-11-18 17:13:29 +01:00
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
2023-08-05 15:03:11 +02:00
setOutputAtIndex(flatIndex${e===1?"":` / ${e}`}, value);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
fn setOutputAtCoordsI32(${a.map(p=>`${p} : i32`).join(", ")}, value : ${Ae(e,"i32")}) {
2022-11-18 17:13:29 +01:00
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
2023-08-05 15:03:11 +02:00
setOutputAtIndexI32(flatIndex${e===1?"":` / ${e}`}, value);
}
`}return s}function cae(r){let t=/(\w+)\s*:\s*vec(5|6)/g;r=r.replace(t,o=>"@align(16) "+o);let e=/vec(5|6)\s*,\s*(\w+)/g;return r=r.replace(e,(o,n,s)=>`vec${n}, @align(16) ${s}`),r}function YB(r){return!(r.dispatchLayout.hasOwnProperty("y")&&r.dispatchLayout.y.length!==0||r.dispatchLayout.hasOwnProperty("z")&&r.dispatchLayout.z.length!==0)}var Qv={};qe(Qv,{GPUBytesPerElement:()=>qg,MatMulProgramType:()=>Mo,assertNotComplex:()=>mm,computeDispatch:()=>H,computeWorkPerThreadForConv2d:()=>cm,computeWorkgroupInfoForMatMul:()=>Yv,computeWorkgroupSizeForConv2d:()=>pm,flatDispatchLayout:()=>X,isWebGPUSupported:()=>lm,tilesFitEvenlyIntoShape:()=>mae});var kp=r=>{let t=1;for(let e=0;e<r.length;e++)t*=r[e];return t};function mae(r,t){if(r.length!==t.length)throw new Error(`Cannot compute whether rank ${r.length} tiles fit evenly into rank ${t.length} shape - ranks must match.`);return t.every((e,o)=>e%r[o]===0)}function H(r,t,e=[1,1,1],o=[1,1,1]){let[n,s,a]=[Math.ceil(kp(r.x.map(i=>t[i]))/(e[0]*o[0])),r.y?Math.ceil(kp(r.y.map(i=>t[i]))/(e[1]*o[1])):1,r.z?Math.ceil(kp(r.z.map(i=>t[i]))/(e[2]*o[2])):1];return[n,s,a]}function Yv(r,t,e,o=!1){let n=[8,8,1],s=[4,4,1];return o||(r<=8&&(s[1]=1),t<=16&&e<=16&&(n[0]=4)),{workgroupSize:n,elementsPerThread:s}}function pm(r,t,e=!1){if(e)return[8,8,1];let o=kp(r.x.map(s=>t[s])),n=kp(r.y.map(s=>t[s]));return o<=4?[4,16,1]:n<=4?[16,4,1]:[16,16,1]}function cm(r,t,e=!1){if(e)return[4,4,1];let o=kp(r.x.map(s=>t[s])),n=kp(r.y.map(s=>t[s]));return o<=4?[1,2,1]:n<=4?[2,1,1]:[2,2,1]}function X(r){return{x:r.map((t,e)=>e)}}function qg(r){if(r==="float32"||r==="int32"||r==="bool"||r==="string")return 4;if(r==="complex64")return 8;throw new Error(`Unknown dtype ${r}`)}function lm(){return!!(globalThis&&globalThis.navigator&&globalThis.navigator.gpu)}function mm(r,t){Array.isArray(r)||(r=[r]),r.forEach(e=>{e!=null&&y.assert(e.dtype!=="complex64",()=>`${t} does not support complex64 tensors in the WebGPU backend.`)})}var Mo;(function(r){r[r.MatMulReduceProgram=0]="MatMulReduceProgram",r[r.MatMulSplitKProgram=1]="MatMulSplitKProgram",r[r.MatMulSmallOutputSizeProgram=2]="MatMulSmallOutputSizeProgram",r[r.MatMulPackedProgram=3]="MatMulPackedProgram",r[r.MatMulMax=4]="MatMulMax"})(Mo||(Mo={}));var dae=A().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"),fae=(r,t)=>{let e=r.limits.maxComputeWorkgroupsPerDimension,o=t.dispatchLayout,n=t.dispatch;if(n.every(a=>a<=e))return n;y.assert(n[0]>e&&o.y===void 0&&o.z===void 0,()=>"Dispatch size exceeds WebGPU limits in Y or Z dimension.");let s=Math.ceil(Math.sqrt(n[0]));return s>e?(s=Math.ceil(Math.cbrt(n[0])),y.assert(s<=e,()=>"Total dispatch size exceeds WebGPU maximum."),[s,s,s]):[s,s,1]},Kc=class r extends so{nextDataId(){return r.nextDataId++}constructor(t,e){if(super(),this.commandQueueOwnedIds=new WeakSet,this.dispatchCountInPass=0,this.disposed=!1,this.downloadWaitMs=0,this.tensorDataPendingDisposal=[],this.queryResolveBuffer=null,this.querySet=null,this.querySetCount=2,this.stagingPendingDisposal=[],this.uniformPendingDisposal=[],this.uploadWaitMs=0,this.hasReadSyncWarned=!1,this.hasTimestampQueryWarned=!1,!lm())throw new Error("WebGPU is not supported on this device");this.pipelineCache={},this.device=t,this.queue=t.queue,this.commandEncoder=null,this.computePassEncoder=null,this.adapterInfo=new Gg(e),this.supportTimestampQuery=this.device.features.has("timestamp-query"),this.thresholdToIncreaseWorkgroups=this.adapterInfo.intelGPUGeneration>=12?16:8,this.bufferManager=new Hg(this.device),this.textureManager=new Kg(this.device),this.tensorMap=new Bo(this,ur()),A().getBool("WEBGPU_USE_PROFILE_TOOL")&&(this.dummyCanvas=document.createElement("canvas"),this.dummyCanvas.width=1,this.dummyCanvas.height=1,this.dummyContext=this.dummyCanvas.getContext("webgpu"),this.dummyContext.configure({device:t,format:"bgra8unorm"}),document.body.appendChild(this.dummyCanvas))}floatPrecision(){return 32}disposeData(t,e=!1){if(!this.tensorMap.has(t))return!0;let o=this.tensorMap.get(t);return e?o.refCount=0:o.refCount--,o.refCount>0?!1:(o.complexTensorInfos!=null&&(this.disposeData
2023-08-05 15:03:11 +02:00
let zero = sign(a) * 0 + 0;
let one = sign(b) * 0 + 1;
let resultTemp = select(zero, one, a == b);
`,Sae=`
let remainder =
select(a % b, round(a % b), (round(a) == a) & (round(b) == b));
let quotient = (a - remainder) / b;
let resultTemp =
round(select(quotient, quotient - 1, sign(remainder) == -sign(b)));
`,Iae=`
let zero = sign(a) * 0 + 0;
let one = sign(b) * 0 + 1;
let resultTemp = select(zero, one, a > b);
`,vae=`
let zero = sign(a) * 0 + 0;
let one = sign(b) * 0 + 1;
let resultTemp = select(zero, one, a >= b);
`,kae=`
let zero = sign(a) * 0 + 0;
let one = sign(b) * 0 + 1;
let resultTemp = select(zero, one, a < b);
`,Nae=`
let zero = sign(a) * 0 + 0;
let one = sign(b) * 0 + 1;
let resultTemp = select(zero, one, a <= b);
`,Tae="return f32(a >= 1.0 && b >= 1.0);",_ae=`return (vec4<f32>(a >= vec4<f32>(1.0)) *
vec4<f32>(b >= vec4<f32>(1.0)));`,$ae="return f32(a >= 1.0 || b >= 1.0);",Eae=`return min(vec4<f32>(a >= vec4<f32>(1.0)) +
vec4<f32>(b >= vec4<f32>(1.0)), vec4<f32>(1.0));`,Rae="let resultTemp = max(a, b);",Dae="let resultTemp = min(a, b);",Aae=`
2023-05-08 15:12:41 +02:00
let isNaN = b == 0.;
2022-11-20 22:20:02 +01:00
var resultTemp = a % b;
2023-05-08 15:12:41 +02:00
resultTemp = select((resultTemp + b) % b, resultTemp,
(a < 0. && b < 0.) || (a >= 0. && b > 0.));
2023-08-05 15:03:11 +02:00
`,Fae=`
2023-01-06 19:23:06 +01:00
let isNaN = !vec4<bool>(b);
2022-11-20 22:20:02 +01:00
var resultTemp = vec4<f32>(a % b);
if (!((a[0] < 0. && b[0] < 0.) || (a[0] >= 0. && b[0] > 0.))) {
resultTemp[0] = (resultTemp[0] + b[0]) % b[0];
}
if (!((a[1] < 0. && b[1] < 0.) || (a[1] >= 0. && b[1] > 0.))) {
resultTemp[1] = (resultTemp[1] + b[1]) % b[1];
}
if (!((a[2] < 0. && b[2] < 0.) || (a[2] >= 0. && b[2] > 0.))) {
resultTemp[2] = (resultTemp[2] + b[2]) % b[2];
}
if (!((a[3] < 0. && b[3] < 0.) || (a[3] >= 0. && b[3] > 0.))) {
resultTemp[3] = (resultTemp[3] + b[3]) % b[3];
}
2023-08-05 15:03:11 +02:00
`,Pae="let resultTemp = a * b;",Oae=`
2023-05-08 15:12:41 +02:00
var resultTemp = f32(a != b);
let valueForNaN = 1.0;
2023-08-05 15:03:11 +02:00
`,Mae=`
2022-11-18 17:13:29 +01:00
var resultTemp = vec4<f32>(a != b);
let valueForNaN = 1.0;
2023-08-05 15:03:11 +02:00
`,Lae=`
2023-05-08 15:12:41 +02:00
let isNaN = a < 0.0 && floor(b) < b;
2022-11-18 17:13:29 +01:00
if (b == 0.0) {
return 1.0;
}
2023-05-08 15:12:41 +02:00
var resultTemp = select(sign(a) * pow(abs(a), b), pow(abs(a), b),
round(abs(b) % 2.0) != 1.0);
2023-08-05 15:03:11 +02:00
`,Bae=`
2022-11-18 17:13:29 +01:00
let isModRound1Bool = vec4<i32>(round(abs(b) % vec4<f32>(2.0))) == vec4<i32>(1);
let isModRound1 = vec4<f32>(isModRound1Bool);
let multiplier = sign(a) * isModRound1 + (vec4<f32>(1.0) - isModRound1);
var resultTemp = multiplier * pow(abs(a), b);
// Ensure that a^0 = 1, including 0^0 = 1 as this correspond to TF and JS
let isExpZero = b == vec4<f32>(0.0);
if (isExpZero.r) {
resultTemp.r = 1.0;
}
if (isExpZero.g) {
resultTemp.g = 1.0;
}
if (isExpZero.b) {
resultTemp.b = 1.0;
}
if (isExpZero.a) {
resultTemp.a = 1.0;
}
let isNaN = (a < vec4<f32>(0.0)) & (floor(b) < b);
2023-08-05 15:03:11 +02:00
`,zae="if (a < 0.0) { return b * a; } return a;",Vae=`
2022-11-18 17:13:29 +01:00
let aLessThanZero = vec4<f32>(a < vec4<f32>(0.0));
return (aLessThanZero * (b * a)) + ((vec4<f32>(1.0) - aLessThanZero) * a);
`,Wae="let resultTemp = (a - b) * (a - b);",Uae="let resultTemp = a - b;";function qc(r,t){let e;do{switch(r){case fe.ATAN2:e=gae;break;case fe.MAX:e=Rae;break;case fe.MIN:e=Dae;break;case fe.MOD:e=t?Fae:Aae;break;case fe.NOT_EQUAL:e=t?Mae:Oae;break;case fe.POW:e=t?Bae:Lae;break;default:continue}let o,n,s;return t?(o="isnanVec4",n="vec4<f32>",s="vec4<bool>"):(o="isnan",n="f32",s="bool"),`
let aIsNaN = ${o}(a);
let aPostLegalization = select(a, ${n}(42), aIsNaN);
let bIsNaN = ${o}(b);
let bPostLegalization = select(b, ${n}(42), bIsNaN);
2023-05-08 15:12:41 +02:00
let isNaN = false;
let valueForNaN = uniforms.NAN;
{
let a = aPostLegalization;
let b = bPostLegalization;
2023-08-05 15:03:11 +02:00
${e}
return select(
resultTemp, ${n}(valueForNaN),
${s}(isNaN) | aIsNaN | bIsNaN);
}
2023-08-05 15:03:11 +02:00
`}while(!1);switch(r){case fe.ADD:e=hae;break;case fe.COMPLEX_MULTIPLY_IMAG:e=yae;break;case fe.COMPLEX_MULTIPLY_REAL:e=xae;break;case fe.DIV:e=bae;break;case fe.ELU_DER:e=Cae;break;case fe.EQUAL:e=wae;break;case fe.FLOOR_DIV:e=Sae;break;case fe.GREATER:e=Iae;break;case fe.GREATER_EQUAL:e=vae;break;case fe.LESS:e=kae;break;case fe.LESS_EQUAL:e=Nae;break;case fe.LOGICAL_AND:return t?_ae:Tae;case fe.LOGICAL_OR:return t?Eae:$ae;case fe.MUL:e=Pae;break;case fe.PRELU:return t?Vae:zae;case fe.SQUARED_DIFFERENCE:e=Wae;break;case fe.SUB:e=Uae;break;default:}return`
${e}
return resultTemp;
2023-08-05 15:03:11 +02:00
`}var Z;(function(r){r[r.ABS=0]="ABS",r[r.ACOS=1]="ACOS",r[r.ACOSH=2]="ACOSH",r[r.ASIN=3]="ASIN",r[r.ASINH=4]="ASINH",r[r.ATAN=5]="ATAN",r[r.ATANH=6]="ATANH",r[r.CEIL=7]="CEIL",r[r.COS=8]="COS",r[r.COSH=9]="COSH",r[r.ELU=10]="ELU",r[r.ERF=11]="ERF",r[r.EXP=12]="EXP",r[r.EXPM1=13]="EXPM1",r[r.FLOOR=14]="FLOOR",r[r.IS_FINITE=15]="IS_FINITE",r[r.IS_INF=16]="IS_INF",r[r.IS_NAN=17]="IS_NAN",r[r.LINEAR=18]="LINEAR",r[r.LOG=19]="LOG",r[r.LOG1P=20]="LOG1P",r[r.LOGICAL_NOT=21]="LOGICAL_NOT",r[r.NEG=22]="NEG",r[r.RELU=23]="RELU",r[r.RELU6=24]="RELU6",r[r.LEAKYRELU=25]="LEAKYRELU",r[r.RECIPROCAL=26]="RECIPROCAL",r[r.ROUND=27]="ROUND",r[r.RSQRT=28]="RSQRT",r[r.SELU=29]="SELU",r[r.SIGMOID=30]="SIGMOID",r[r.SIGN=31]="SIGN",r[r.SIN=32]="SIN",r[r.SINH=33]="SINH",r[r.SOFTPLUS=34]="SOFTPLUS",r[r.SQRT=35]="SQRT",r[r.SQUARE=36]="SQUARE",r[r.STEP=37]="STEP",r[r.TAN=38]="TAN",r[r.TANH=39]="TANH",r[r.TO_INT=40]="TO_INT"})(Z||(Z={}));var Gae="return abs(a);",Hae=`
2022-11-20 22:20:02 +01:00
if (abs(a) > 1.) {
return uniforms.NAN;
}
return acos(a);
2023-08-05 15:03:11 +02:00
`,Kae=`
2022-11-20 22:20:02 +01:00
if (a < 1.) {
return uniforms.NAN;
}
return acosh(a);
2023-08-05 15:03:11 +02:00
`,qae=`
2022-11-20 22:20:02 +01:00
if (abs(a) > 1.) {
return uniforms.NAN;
}
return asin(a);
2023-08-05 15:03:11 +02:00
`,jae="return asinh(a);",Xae=`
2022-11-20 22:20:02 +01:00
if (isnan(a)) {
return uniforms.NAN;
}
return atan(a);
2023-08-05 15:03:11 +02:00
`,Yae=`
2022-11-20 22:20:02 +01:00
if (abs(a) > 1.) {
return uniforms.NAN;
}
if (a == 1.) {
return uniforms.INFINITY;
}
if (a == -1.) {
return -uniforms.INFINITY;
}
return atanh(a);
2023-08-05 15:03:11 +02:00
`,Qae="return ceil(a);",Zae="return cos(a);",Jae=`
2022-11-18 17:13:29 +01:00
let e2x = exp(-a);
return (e2x + 1.0 / e2x) / 2.0;
2023-08-05 15:03:11 +02:00
`,eie="return exp(a) - 1.0;",tie="if (a >= 0.0) { return a; } return (exp(a) - 1.0);",rie=`
2022-11-18 17:13:29 +01:00
var resFloat = exp(a) - vec4<f32>(1.0);
if (a.r >= 0.0) {
resFloat.r = a.r;
}
if (a.g >= 0.0) {
resFloat.g = a.g;
}
if (a.b >= 0.0) {
resFloat.b = a.b;
}
if (a.a >= 0.0) {
resFloat.a = a.a;
}
return resFloat;
2023-08-05 15:03:11 +02:00
`,oie=`
2022-11-20 22:20:02 +01:00
// Error function is calculated approximately with elementary function.
// See "Handbook of Mathematical Functions with Formulas,
// Graphs, and Mathematical Tables", Abramowitz and Stegun.
let p = ${w.ERF_P};
let a1 = ${w.ERF_A1};
let a2 = ${w.ERF_A2};
let a3 = ${w.ERF_A3};
let a4 = ${w.ERF_A4};
let a5 = ${w.ERF_A5};
2022-11-20 22:20:02 +01:00
let sign = sign(a);
let absA = abs(a);
let t = 1.0 / (1.0 + p * absA);
return sign * (1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-absA * absA));
2023-08-05 15:03:11 +02:00
`,nie="return exp(a);",sie="return floor(a);",aie="return f32(!isnan(a) && !isinf(a));",iie="return f32(isinf(a));",uie="return f32(isnan(a));",pie="return a;",cie=`if (a < 0.0) { return uniforms.NAN; }
return log(a);`,lie=`
2022-11-20 22:20:02 +01:00
if (isnan(a)) { return a; }
return log(1.0 + a);
2023-08-05 15:03:11 +02:00
`,mie="return f32(!(a >= 1.0));",die="return -a;",fie="if (a < 0.0) { return uniforms.alpha * a; } return a;",hie=`
2022-11-18 17:13:29 +01:00
let aLessThanZero = vec4<f32>(a < vec4<f32>(0.0));
return (aLessThanZero * (uniforms.alpha * a)) + ((vec4<f32>(1.0) - aLessThanZero) * a);
2023-08-05 15:03:11 +02:00
`,gie="return 1.0 / a;",xie="return select(a, 0.0, a < 0.0);",yie="return clamp(a, 0.0, 6.0);",bie="return clamp(a, vec4<f32>(0.0, 0.0, 0.0, 0.0), vec4<f32>(6.0, 6.0, 6.0, 6.0));",Cie=`
2022-11-18 17:13:29 +01:00
return select(a, vec4<f32>(0.0), a < vec4<f32>(0.0));
2023-08-05 15:03:11 +02:00
`,wie="return round(a);",Sie="return inverseSqrt(a);",Iie=`
2023-01-06 19:23:06 +01:00
if (a >= 0.0) {
return ${w.SELU_SCALE} * a;
2023-01-06 19:23:06 +01:00
} else {
return ${w.SELU_SCALEALPHA} * (exp(a) - 1.0);
2023-01-06 19:23:06 +01:00
}
2023-08-05 15:03:11 +02:00
`,vie="return 1.0 / (1.0 + exp(-1.0 * a));",kie="return sign(a);",Nie="return sin(a);",Tie=`
2022-11-18 17:13:29 +01:00
let e2x = exp(a);
return (e2x - 1.0 / e2x) / 2.0;
2023-08-05 15:03:11 +02:00
`,_ie=`
2023-01-06 19:23:06 +01:00
let epsilon = 1.1920928955078125e-7;
let threshold = log(epsilon) + 2.0;
let too_large = a > -threshold;
let too_small = a < threshold;
let exp_a = exp(a);
if (too_large) {
return a;
} else if (too_small) {
return exp_a;
} else {
return log(exp_a + 1.0);
}
2023-08-05 15:03:11 +02:00
`,$ie="return sqrt(a);",Eie="return a * a;",Rie=`
2023-01-06 19:23:06 +01:00
if (isnan(a)) {
return a;
}
return select(uniforms.stepAlpha, 1.0, a > 0.0);
2023-08-05 15:03:11 +02:00
`,Die="return tan(a);",Aie=`
2022-11-18 17:13:29 +01:00
let e2x = exp(-2.0 * abs(a));
return sign(a) * (1.0 - e2x) / (1.0 + e2x);
`,Fie="return f32(i32((a)));";function wi(r,t){switch(r){case Z.ABS:return Gae;case Z.ACOS:return Hae;case Z.ACOSH:return Kae;case Z.ASIN:return qae;case Z.ASINH:return jae;case Z.ATAN:return Xae;case Z.ATANH:return Yae;case Z.COS:return Zae;case Z.COSH:return Jae;case Z.CEIL:return Qae;case Z.ELU:return t?rie:tie;case Z.ERF:return oie;case Z.EXP:return nie;case Z.EXPM1:return eie;case Z.FLOOR:return sie;case Z.IS_FINITE:return aie;case Z.IS_INF:return iie;case Z.IS_NAN:return uie;case Z.LINEAR:return pie;case Z.LOG:return cie;case Z.LOG1P:return lie;case Z.LOGICAL_NOT:return mie;case Z.NEG:return die;case Z.LEAKYRELU:return t?hie:fie;case Z.RECIPROCAL:return gie;case Z.RELU:return t?Cie:xie;case Z.RELU6:return t?bie:yie;case Z.ROUND:return wie;case Z.RSQRT:return Sie;case Z.SELU:return Iie;case Z.SIGMOID:return vie;case Z.SIGN:return kie;case Z.SIN:return Nie;case Z.SINH:return Tie;case Z.SOFTPLUS:return _ie;case Z.SQRT:return $ie;case Z.SQUARE:return Eie;case Z.STEP:return Rie;case Z.TAN:return Die;case Z.TANH:return Aie;case Z.TO_INT:return Fie;default:throw new Error(`BinaryType ${r} is not implemented!`)}}function dr(r,t=!1,e=!1,o=3){if(r===null)return"";let n="";if(r==="linear")n=wi(Z.LINEAR);else if(r==="relu")n=wi(Z.RELU,e);else if(r==="elu")n=wi(Z.ELU,e);else if(r==="relu6")n=wi(Z.RELU6,e);else if(r==="prelu")n=qc(fe.PRELU,e);else if(r==="sigmoid")n=wi(Z.SIGMOID,e);else if(r==="leakyrelu")n=wi(Z.LEAKYRELU,e);else throw new Error(`Activation ${r} has not been implemented for the WebGPU backend.`);let a=Ae(e?4:1),i="";return t?i=`
2022-11-18 17:13:29 +01:00
fn activation(a : ${a}, coords : vec${o}<i32>) -> ${a} {
let b = getPreluActivationWeightsByOutputCoords(coords);
${n}
}`:i=`
fn activation(a : ${a}, coords : vec${o}<i32>) -> ${a} {
${n}
}`,i}function Qr(r,t){return`
2022-11-18 17:13:29 +01:00
${r?"value = value + getBiasByOutputCoords(coords);":""}
2023-08-05 15:03:11 +02:00
${t?"value = activation(value, coords);":""}
`}function Zv(r,t,e=!1,o=!1,n=!1,s=1){y.assert(r&&s===1||!r,()=>`transposeA ${r} is not compatible with component size ${s}`);let a=`
2023-01-06 19:23:06 +01:00
${r?"value = getA(batch, col, row);":"value = getA(batch, row, col);"}
2023-08-05 15:03:11 +02:00
`,i=t?"value = getB(batch, col, row);":"value = getB(batch, row, col);";return`
fn mm_readA(batch: i32, row: i32, col: i32) -> ${Ae(s)} {
2023-05-08 15:12:41 +02:00
var value = ${Ae(s)}(0.0);
2023-08-05 15:03:11 +02:00
${e&&n?a:`
2023-01-06 19:23:06 +01:00
${r?"if(row < uniforms.dimAOuter && col < uniforms.dimInner)":"if(row < uniforms.aShape[1] && col < uniforms.aShape[2])"}
2022-11-18 17:13:29 +01:00
{
2023-01-06 19:23:06 +01:00
${a}
2022-11-18 17:13:29 +01:00
}
`}
return value;
}
2023-08-05 15:03:11 +02:00
fn mm_readB(batch: i32, row: i32, col: i32) -> ${Ae(s)} {
2023-05-08 15:12:41 +02:00
var value = ${Ae(s)}(0.0);
2023-01-06 19:23:06 +01:00
${i}
2022-11-18 17:13:29 +01:00
return value;
}
`}function dm(r,t,e,o,n=!1,s=!1,a=!1,i=1){return`
${Zv(e,o,n,s,a,i)}
2023-08-05 15:03:11 +02:00
fn mm_write(batch: i32, row: i32, col: i32, valueIn: ${Ae(i)}) {
2023-01-06 19:23:06 +01:00
${n&&s?"":"if (row < uniforms.dimAOuter && col < uniforms.dimBOuter)"}
2022-11-18 17:13:29 +01:00
{
var value = valueIn;
let coords = vec3<i32>(batch, row, col);
${Qr(r,t)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], value);
}
}
2023-08-05 15:03:11 +02:00
`}var Pie=(r,t)=>r?`
2023-01-06 19:23:06 +01:00
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
2023-08-05 15:03:11 +02:00
globalRowStart + inputCol * ${t});
2022-11-18 17:13:29 +01:00
`:`
2023-01-06 19:23:06 +01:00
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
globalRow + innerRow,
2023-08-05 15:03:11 +02:00
kStart + inputCol * ${t});
`,Oie=(r,t,e,o)=>{if(r)return`
for (var k = 0; k < ${o}; k++) {
let BCached0 = mm_Bsub[k][tileCol];
let ACached0 = mm_Asub[k][localRow];
for (var i = 0; i < ${e}; i++) {
2023-05-08 15:12:41 +02:00
acc[i] = fma(BCached0, vec4<f32>(ACached0[i]), acc[i]);
2023-08-05 15:03:11 +02:00
}
}`;{let n="",s="";for(let a=0;a<t;a++)n+=`let BCached${a} = mm_Bsub[k * ${t} + ${a}][tileCol];`,s+=`acc[i] = fma(BCached${a}, vec4<f32>(ACached[${a}]), acc[i]);`;return`
for (var k = 0; k < ${o/t}; k++) {
${n}
for (var i = 0; i < ${e}; i++) {
2022-11-18 17:13:29 +01:00
let ACached = mm_Asub[tileRow + i][k];
2023-08-05 15:03:11 +02:00
${s}
}
}`}};function Np(r,t,e=!1,o=32,n=!1,s=32,a=!1){let i=t[1]*r[1],p=t[0]*r[0],u=e?i:o,c=e?o:i,l=u/t[0],m=o/t[1],d=r[1],f=r[0];return y.assert((e&&l===4&&r[1]===4||!e&&(l===3||l===4))&&u%t[0]===0&&o%t[1]===0&&r[0]===4,()=>`If transposeA ${e} is true, innerElementSize ${l} and workPerThread[1] ${r[1]} must be 4.
2023-05-08 15:12:41 +02:00
Otherwise, innerElementSize ${l} must be 3 or 4.
2023-08-05 15:03:11 +02:00
tileAWidth ${u} must be divisible by workgroupSize[0]${t[0]}. tileInner ${o} must be divisible by workgroupSize[1] ${t[1]}. colPerThread ${r[0]} must be 4.`),`
2023-05-08 15:12:41 +02:00
var<workgroup> mm_Asub : array<array<vec${l}<f32>, ${u/l}>, ${c}>;
var<workgroup> mm_Bsub : array<array<vec4<f32>, ${p/r[0]}>, ${o}>;
2023-08-05 15:03:11 +02:00
${G()} {
2022-11-18 17:13:29 +01:00
let localRow = i32(localId.y);
2023-05-08 15:12:41 +02:00
let tileRow = localRow * ${d};
2022-11-18 17:13:29 +01:00
let tileCol = i32(localId.x);
2023-05-08 15:12:41 +02:00
let globalRow = i32(globalId.y) * ${d};
2023-08-05 15:03:11 +02:00
let globalCol = i32(globalId.x) * ${f};
2022-11-18 17:13:29 +01:00
let batch = ${n?"0":"i32(globalId.z)"};
2023-05-08 15:12:41 +02:00
let batchA = ${n||!a?"batch":"batch % uniforms.aShape[0]"};
let batchB = ${n||!a?"batch":"batch % uniforms.bShape[0]"};
let globalRowStart = i32(workgroupId.y) * ${i};
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
let numTiles = ${n?`${Math.ceil(s/o)}`:`(uniforms.dimInner - 1) / ${o} + 1`};
2022-11-18 17:13:29 +01:00
var kStart = ${n?`i32(globalId.z) * ${s}`:"0"};
2023-05-08 15:12:41 +02:00
var acc: array<vec4<f32>, ${d}>;
2022-11-18 17:13:29 +01:00
// Loop over shared dimension.
2023-05-08 15:12:41 +02:00
let tileRowB = localRow * ${m};
2023-01-06 19:23:06 +01:00
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-05-08 15:12:41 +02:00
for (var innerRow = 0; innerRow < ${d}; innerRow++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRow + innerRow;
let inputCol = tileCol;
2023-08-05 15:03:11 +02:00
${Pie(e,l)}
2022-11-18 17:13:29 +01:00
}
// Load one tile of B into local memory.
2023-05-08 15:12:41 +02:00
for (var innerRow = 0; innerRow < ${m}; innerRow++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRowB + innerRow;
let inputCol = tileCol;
2023-01-06 19:23:06 +01:00
mm_Bsub[inputRow][inputCol] = mm_readB(batchB, kStart + inputRow, globalCol);
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
kStart = kStart + ${o};
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-08-05 15:03:11 +02:00
${Oie(e,l,d,o)}
2022-11-18 17:13:29 +01:00
workgroupBarrier();
}
2023-05-08 15:12:41 +02:00
for (var innerRow = 0; innerRow < ${d}; innerRow++) {
2022-11-18 17:13:29 +01:00
mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]);
}
}`}var ez=r=>r?`
2023-01-06 19:23:06 +01:00
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
globalRowStart + inputCol);
`:`
2023-01-06 19:23:06 +01:00
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
globalRowStart + inputRow,
kStart + inputCol);
`,Mie=r=>r?"let ACached = mm_Asub[k][tileRow + innerRow];":"let ACached = mm_Asub[tileRow + innerRow][k];";function Tp(r,t,e=!1,o=32,n=!1,s=32,a=!1,i=!1){let p=r[1]*t[1],u=r[0]*t[0],c=e?p:o,l=e?o:p;y.assert(l%t[1]===0&&c%t[0]===0&&o%t[1]===0,()=>`tileAHight ${l} must be divisible by workgroupSize[1]${t[1]}, tileAWidth ${c} must be divisible by workgroupSize[0]${t[0]}, tileInner ${o} must be divisible by workgroupSize[1]${t[1]}`);let m=l/t[1],d=c/t[0],f=o/t[1],h=r[1],g=r[0],x=a?`
2022-11-18 17:13:29 +01:00
let localRow = i32(localId.y);
let localCol = i32(localId.x);
2023-01-06 19:23:06 +01:00
let globalRowStart = i32(workgroupId.y) * ${p};
let globalColStart = i32(workgroupId.x) * ${u};
2022-11-18 17:13:29 +01:00
// Loop over shared dimension.
2023-01-06 19:23:06 +01:00
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-08-05 15:03:11 +02:00
for (var inputRow = localRow; inputRow < ${l}; inputRow = inputRow + ${t[1]}) {
for (var inputCol = localCol; inputCol < ${c}; inputCol = inputCol + ${t[0]}) {
${ez(e)}
2022-11-18 17:13:29 +01:00
}
}
// Load one tile of B into local memory.
2023-08-05 15:03:11 +02:00
for (var inputRow = localRow; inputRow < ${o}; inputRow = inputRow + ${t[1]}) {
for (var inputCol = localCol; inputCol < ${u}; inputCol = inputCol + ${t[0]}) {
2023-01-06 19:23:06 +01:00
mm_Bsub[inputRow][inputCol] = mm_readB(batchB,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
globalColStart + inputCol);
}
}
2023-01-06 19:23:06 +01:00
kStart = kStart + ${o};
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-01-06 19:23:06 +01:00
var BCached : array<f32, ${g}>;
for (var k = 0; k < ${o}; k++) {
for (var inner = 0; inner < ${g}; inner++) {
2023-08-05 15:03:11 +02:00
BCached[inner] = mm_Bsub[k][localCol + inner * ${t[0]}];
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
2023-08-05 15:03:11 +02:00
let ACached = ${e?`mm_Asub[k][localRow + innerRow * ${t[1]}];`:`mm_Asub[localRow + innerRow * ${t[1]}][k];`}
2023-01-06 19:23:06 +01:00
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2023-05-08 15:12:41 +02:00
acc[innerRow][innerCol] =
fma(ACached, BCached[innerCol], acc[innerRow][innerCol]);
2022-11-18 17:13:29 +01:00
}
}
}
workgroupBarrier();
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
2023-08-05 15:03:11 +02:00
let gRow = globalRowStart + localRow + innerRow * ${t[1]};
2023-01-06 19:23:06 +01:00
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2023-08-05 15:03:11 +02:00
let gCol = globalColStart + localCol + innerCol * ${t[0]};
2022-11-18 17:13:29 +01:00
mm_write(batch, gRow, gCol, acc[innerRow][innerCol]);
}
}
`:`
2023-01-06 19:23:06 +01:00
let tileRow = i32(localId.y) * ${h};
let tileCol = i32(localId.x) * ${g};
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
let globalRow = i32(globalId.y) * ${h};
let globalCol = i32(globalId.x) * ${g};
let globalRowStart = i32(workgroupId.y) * ${p};
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
let tileRowA = i32(localId.y) * ${m};
let tileColA = i32(localId.x) * ${d};
let tileRowB = i32(localId.y) * ${f};
2022-11-18 17:13:29 +01:00
// Loop over shared dimension.
2023-01-06 19:23:06 +01:00
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${m}; innerRow++) {
for (var innerCol = 0; innerCol < ${d}; innerCol++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRowA + innerRow;
let inputCol = tileColA + innerCol;
${ez(e)}
2022-11-18 17:13:29 +01:00
}
}
// Load one tile of B into local memory.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${f}; innerRow++) {
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRowB + innerRow;
let inputCol = tileCol + innerCol;
2023-01-06 19:23:06 +01:00
mm_Bsub[inputRow][inputCol] = mm_readB(batchB,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
globalCol + innerCol);
}
}
2023-01-06 19:23:06 +01:00
kStart = kStart + ${o};
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-01-06 19:23:06 +01:00
var BCached : array<f32, ${g}>;
for (var k = 0; k < ${o}; k++) {
for (var inner = 0; inner < ${g}; inner++) {
2022-11-18 17:13:29 +01:00
BCached[inner] = mm_Bsub[k][tileCol + inner];
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
2023-08-05 15:03:11 +02:00
${Mie(e)}
2023-01-06 19:23:06 +01:00
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2023-05-08 15:12:41 +02:00
acc[innerRow][innerCol] =
fma(ACached, BCached[innerCol], acc[innerRow][innerCol]);
2022-11-18 17:13:29 +01:00
}
}
}
workgroupBarrier();
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
mm_write(batch, globalRow + innerRow, globalCol + innerCol,
acc[innerRow][innerCol]);
}
}
`;return`
2023-01-06 19:23:06 +01:00
var<workgroup> mm_Asub : array<array<f32, ${c}>, ${l}>;
var<workgroup> mm_Bsub : array<array<f32, ${u}>, ${o}>;
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
${G()} {
2022-11-18 17:13:29 +01:00
let batch = ${n?"0":"i32(globalId.z)"};
2023-01-06 19:23:06 +01:00
let batchA = ${n||!i?"batch":"batch % uniforms.aShape[0]"};
let batchB = ${n||!i?"batch":"batch % uniforms.bShape[0]"};
let numTiles = ${n?`${Math.ceil(s/o)}`:`(uniforms.dimInner - 1) / ${o} + 1`};
2022-11-18 17:13:29 +01:00
var kStart = ${n?`i32(globalId.z) * ${s}`:"0"};
2023-01-06 19:23:06 +01:00
var acc : array<array<f32, ${g}>, ${h}>;
2022-11-18 17:13:29 +01:00
// Without this initialization strange values show up in acc.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
acc[innerRow][innerCol] = 0.0;
}
}
2023-01-06 19:23:06 +01:00
${x}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
`}var Lie=r=>r?`
2023-01-06 19:23:06 +01:00
mm_readA(batchA, colA, globalRow),
mm_readA(batchA, colA + 1, globalRow),
mm_readA(batchA, colA + 2, globalRow),
mm_readA(batchA, colA + 3, globalRow)
2022-11-18 17:13:29 +01:00
`:`
2023-01-06 19:23:06 +01:00
mm_readA(batchA, globalRow, colA),
mm_readA(batchA, globalRow, colA + 1),
mm_readA(batchA, globalRow, colA + 2),
mm_readA(batchA, globalRow, colA + 3)
2023-08-05 15:03:11 +02:00
`;function Bie(r,t=!1){y.assert(r[1]===1&&r[2]===1,()=>`A linear work group size is required. But got ${r}.`);let e=r[0]*4;return`
2022-11-18 17:13:29 +01:00
var<workgroup> mm_Asub : array<vec4<f32>, ${r[0]}>;
2023-08-05 15:03:11 +02:00
${G()} {
2022-11-18 17:13:29 +01:00
let tileCol = i32(localId.x);
let globalCol = i32(globalId.x);
let globalRow = i32(globalId.y);
2023-08-05 15:03:11 +02:00
let numTiles = (uniforms.dimInner - 1) / ${e} + 1;
2022-11-18 17:13:29 +01:00
let batch = i32(globalId.z);
2023-01-06 19:23:06 +01:00
let batchA = batch % uniforms.aShape[0];
let batchB = batch % uniforms.bShape[0];
2022-11-18 17:13:29 +01:00
// Without this initialization strange values show up in acc.
var acc = 0.0;
// Loop over shared dimension.
2023-01-06 19:23:06 +01:00
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-08-05 15:03:11 +02:00
let colA = t * ${e} + tileCol * 4;
mm_Asub[tileCol] = vec4<f32>(${Lie(t)});
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-08-05 15:03:11 +02:00
for (var k = 0; k < ${e/4}; k++) {
let rowB = t * ${e} + k * 4;
2023-01-06 19:23:06 +01:00
let BCached = vec4<f32>(mm_readB(batchB, rowB, globalCol),
mm_readB(batchB, rowB + 1, globalCol),
mm_readB(batchB, rowB + 2, globalCol),
mm_readB(batchB, rowB + 3, globalCol));
2022-11-18 17:13:29 +01:00
let ACached = mm_Asub[k];
acc = acc + dot(ACached, BCached);
}
workgroupBarrier();
}
mm_write(batch, globalRow, globalCol, acc);
}
`}var jg=class{constructor(t,e,o=!1,n=!1,s=null,a=null,i=null,p=!1){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=e,this.dispatchLayout={x:[2],y:[1],z:[0]};let u=o?t[1]:t[2];if(this.isVec4=(u%4===0&&!o||e[1]%4===0&&o)&&e[2]%4===0&&!n,this.outputComponent=this.isVec4?4:1,this.isVectorA=e[1]===1&&!o,!this.isVec4&&this.isVectorA)this.elementsPerThread=[1,1,1],this.workgroupSize=[32,1,1];else{let m=Yv(e[1],u,e[2],o);this.workgroupSize=m.workgroupSize,this.elementsPerThread=m.elementsPerThread}this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,this.elementsPerThread);let c=s!=null,l=i!=null;c&&this.variableNames.push("bias"),l&&this.variableNames.push("preluActivationWeights"),this.sequentialAccessByThreads=p,this.transposeA=o,this.transposeB=n,this.addBias=c,this.activation=a,this.hasPreluActivationWeights=l,[this.fitAOuter,this.fitBOuter,this.fitInner]=this.getShapeFit(e[1],e[2],u),this.shaderKey=`matMulPacked_${this.elementsPerThread}_${o}_${n}_${this.activation}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.isVectorA}_${this.sequentialAccessByThreads}`}getShapeFit(t,e,o){let n=this.workgroupSize[1]*this.elementsPerThread[1],s=this.workgroupSize[0]*this.elementsPerThread[0];!this.isVec4&&this.isVectorA?this.tileInner=this.workgroupSize[0]*4:this.tileInner=s;let a=t%n===0,i=e%s===0,p=o%this.tileInner===0;return[a,i,p]}getUserCode(){return`
${dr(this.activation,this.hasPreluActivationWeights,this.isVec4)}
${dm(this.addBias,this.activation,!1,this.transposeB,this.fitAOuter,this.fitBOuter,this.fitInner,this.isVec4?4:1)}
${this.isVec4?Np(this.elementsPerThread,this.workgroupSize,this.transposeA,this.tileInner,!1,null,!0):this.isVectorA?Bie(this.workgroupSize,this.transposeA):Tp(this.elementsPerThread,this.workgroupSize,this.transposeA,this.tileInner,!1,null,this.sequentialAccessByThreads,!0)}
2023-08-05 15:03:11 +02:00
`}};function zie(r){return`
2023-01-06 19:23:06 +01:00
var<workgroup> sumValues : array<f32, ${r}>;
2023-08-05 15:03:11 +02:00
${G()} {
2022-11-18 17:13:29 +01:00
let coords = getOutputCoords();
let batch = coords[0];
2023-01-06 19:23:06 +01:00
let batchA = batch % uniforms.aShape[0];
let batchB = batch % uniforms.bShape[0];
2022-11-18 17:13:29 +01:00
let row = coords[1];
let col = coords[2];
var sum = 0.0;
let Length = uniforms.dimInner;
2023-01-06 19:23:06 +01:00
for (var k = i32(localId.x); k < Length; k = k + ${r}) {
let dataA = mm_readA(batchA, row, k);
let dataB = mm_readB(batchB, k, col);
2022-11-18 17:13:29 +01:00
sum = sum + dataA * dataB;
}
sumValues[localId.x] = sum;
workgroupBarrier();
2023-01-06 19:23:06 +01:00
for(var currentSize = ${r/2}u; currentSize > 1u;
2022-11-18 17:13:29 +01:00
currentSize = currentSize / 2u) {
if (localId.x < currentSize)
{
sumValues[localId.x] = sumValues[localId.x] + sumValues[localId.x + currentSize];
}
workgroupBarrier();
}
if (localId.x == 0u) {
sum = sumValues[0] + sumValues[1];
mm_write(batch, row, col, sum);
}
}
`}var Xg=class{constructor(t,e=!1,o=!1,n=null,s=null,a=null){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workgroupSize=[256,1,1],this.outputShape=t,this.dispatchLayout={x:[],y:[1,2],z:[0]},this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize);let i=n!=null,p=a!=null;i&&this.variableNames.push("bias"),p&&this.variableNames.push("preluActivationWeights"),this.transposeA=e,this.transposeB=o,this.addBias=i,this.activation=s,this.hasPreluActivationWeights=p,this.shaderKey=`matMulReduce_${this.activation}_${e}_${o}`}getUserCode(){return`
${dr(this.activation,this.hasPreluActivationWeights)}
${dm(this.addBias,this.activation,this.transposeA,this.transposeB)}
2023-08-05 15:03:11 +02:00
${zie(this.workgroupSize[0])}
`}};function Vie(r){let t=r[1],e=r[0],o=t>e?t:e;return`
var<workgroup> mm_Asub : array<array<f32, ${o}>, ${t}>;
var<workgroup> mm_Bsub : array<array<f32, ${e}>, ${o}>;
2022-11-18 17:13:29 +01:00
// If the output size is small for matrix multiplication, avoid to use vec4
// and handle some elements per thread to optimally utilize the ALU.
// Read data from global memory to registers firstly, then store them into
// shared memory, so it is instruction-Level parallelism for arithmetic
// operations and others handle IO operations between barrier api, makes ALU
// and load/store units work simultaneously, could improves the performance.
2023-08-05 15:03:11 +02:00
${G()} {
2022-11-18 17:13:29 +01:00
let tileRow = i32(localId.y);
let tileCol = i32(localId.x);
let globalRow = i32(globalId.y);
let globalCol = i32(globalId.x);
let batch = i32(globalId.z);
2023-01-06 19:23:06 +01:00
let batchA = batch % uniforms.aShape[0];
let batchB = batch % uniforms.bShape[0];
2022-11-18 17:13:29 +01:00
// uniforms.dimInner should be greater than 0.
let numTiles = (uniforms.dimInner - 1) / ${o} + 1;
var acc = 0.0;
var globalColA = tileCol;
var globalRowB = 0;
2023-01-06 19:23:06 +01:00
var regA = mm_readA(batchA, globalRow, globalColA);
var regB0 = mm_readB(batchB, globalRowB + 2 * tileRow, globalCol);
var regB1 = mm_readB(batchB, globalRowB + 2 * tileRow + 1, globalCol);
2022-11-18 17:13:29 +01:00
globalColA = globalColA + ${o};
globalRowB = globalRowB + ${o};
for (var t = 0; t < numTiles; t = t + 1) {
mm_Asub[tileRow][tileCol] = regA;
mm_Bsub[2 * tileRow][tileCol] = regB0;
mm_Bsub[2 * tileRow + 1][tileCol] = regB1;
workgroupBarrier();
2023-01-06 19:23:06 +01:00
regA = mm_readA(batchA, globalRow, globalColA);
regB0 = mm_readB(batchB, globalRowB + 2 * tileRow, globalCol);
regB1 = mm_readB(batchB, globalRowB + 2 * tileRow + 1, globalCol);
2022-11-18 17:13:29 +01:00
globalColA = globalColA + ${o};
globalRowB = globalRowB + ${o};
for (var k = 0; k < ${o}; k = k + 1) {
acc = acc + mm_Asub[tileRow][k] * mm_Bsub[k][tileCol];
}
workgroupBarrier();
}
mm_write(batch, globalRow, globalCol, acc);
}
`}var Yg=class{constructor(t,e,o,n=!1,s=!1,a=null,i=null,p=null){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workgroupSize=[16,8,1],this.outputShape=o,this.dispatchLayout={x:[2],y:[1],z:[0]},this.dispatch=[Math.ceil(o[2]/this.workgroupSize[0]),Math.ceil(o[1]/this.workgroupSize[1]),o[0]];let u=a!=null;u&&this.variableNames.push("bias");let c=p!=null;c&&this.variableNames.push("preluActivationWeights"),this.transposeA=n,this.transposeB=s,this.addBias=u,this.activation=i,this.hasPreluActivationWeights=c,this.shaderKey=`matMulSmallOutputSize_${this.activation}_${n}_${s}`}getUserCode(){return`
${dr(this.activation,this.hasPreluActivationWeights)}
${dm(this.addBias,this.activation,this.transposeA,this.transposeB)}
2023-08-05 15:03:11 +02:00
${Vie(this.workgroupSize)}
`}};var Qg=class{constructor(t,e,o=!1,n=!1){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workgroupSize=[8,8,1],this.atomic=!0,this.splitedDimInner=128,y.assert(t[0]===1,()=>"MatMulSplitKProgram only supports batch = 1."),this.outputShape=t,this.dispatchLayout={x:[2],y:[1],z:[0,3]};let s=(o&&this.outputShape[1]%4===0||!o&&e%4===0)&&this.outputShape[2]%4===0;this.elementsPerThread=[4,4,this.splitedDimInner],this.outputComponent=s?4:1,s||(this.outputShape[1]<16&&(this.elementsPerThread[1]=1),this.outputShape[2]<16&&(this.elementsPerThread[0]=1)),this.dispatch=H(this.dispatchLayout,[this.outputShape[0],this.outputShape[1],this.outputShape[2],e],this.workgroupSize,this.elementsPerThread),this.transposeA=o,this.transposeB=n,this.shaderKey=`matMulSplitK_${o}_${n}_${this.elementsPerThread}_${this.outputComponent}`}getUserCode(){let t=this.outputComponent;return`
${Zv(!1,this.transposeB,!1,!1,!1,t)}
2023-08-05 15:03:11 +02:00
fn mm_write(batch: i32, row : i32, col : i32, value : ${Ae(t)}) {
2022-11-18 17:13:29 +01:00
if (row < uniforms.dimAOuter && col < uniforms.dimBOuter) {
let coords = vec3<i32>(batch, row, col);
let flatIndex = getOutputIndexFromCoords(coords);
// The problem is that we should initialize output to zero before using.
// Otherwise, the original value will be added to the result.
2023-08-05 15:03:11 +02:00
for (var i = 0; i < ${t}; i = i + 1) {
${Yr("&result[flatIndex + i]",`${t>1?"value[i]":"value"}`,"float32")}
2023-01-06 19:23:06 +01:00
}
2022-11-18 17:13:29 +01:00
}
}
${t===4?Np(this.elementsPerThread,this.workgroupSize,this.transposeA,32,!0,this.splitedDimInner):Tp(this.elementsPerThread,this.workgroupSize,this.transposeA,32,!0,this.splitedDimInner)}
`}},Zg=class{constructor(t,e=null,o=null,n=null){this.uniforms="",this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.addBias=e!=null,this.hasPreluActivationWeights=n!=null,this.activation=o,this.addBias&&this.variableNames.push("bias"),this.hasPreluActivationWeights&&this.variableNames.push("preluActivationWeights"),this.shaderKey=`biasActivation_${o}`}getUserCode(){return`
${dr(this.activation,this.hasPreluActivationWeights)}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var value = getXByOutputIndex(index);
${Qr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
setOutputAtIndex(index, value);
}
}
`}};var Jg=class{constructor(t){this.variableNames=[],this.outputShape=[],this.uniforms="value : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="fill"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
setOutputAtIndex(index, uniforms.value);
}
}
`}};function vt(r){let{backend:t,attrs:e}=r,{shape:o,value:n}=e,{dtype:s}=e;if(s=s||y.inferDtype(n),s==="string"){let a=y.getArrayFromDType(s,y.sizeFromShape(o));return a.fill(n),t.makeTensorInfo(o,s,a)}else{let a=new Jg(o),i=[{type:"float32",data:[n]}];return t.runWebGPUProgram(a,[],s,i)}}var tz={kernelName:sa,backendName:"webgpu",kernelFunc:vt};function pe(r){let{inputs:t,attrs:e}=r,{x:o}=t,{shape:n}=e,s=y.sizeFromShape(o.shape),a=y.inferFromImplicitShape(n,s),i=y.sizeFromShape(a);return y.assert(s===i,()=>`The new shape (${a}) has ${i} elements and the old shape (${o.shape}) has ${s} elements. The new shape and old shape must have the same number of elements.`),r.backend.incRef(o.dataId),{dataId:o.dataId,shape:a,dtype:o.dtype}}var rz={kernelName:da,backendName:"webgpu",kernelFunc:pe};function _p({a:r,b:t,transposeA:e,transposeB:o,backend:n,bias:s=null,preluActivationWeights:a=null,leakyreluAlpha:i=0,activation:p=null}){let u=r.shape.length,c=t.shape.length,l=e?r.shape[u-2]:r.shape[u-1],m=o?t.shape[c-1]:t.shape[c-2],d=e?r.shape[u-1]:r.shape[u-2],f=o?t.shape[c-2]:t.shape[c-1],h=r.shape.slice(0,-2),g=t.shape.slice(0,-2),x=y.sizeFromShape(h),b=y.sizeFromShape(g),S=Sr.assertAndGetBroadcastShape(r.shape.slice(0,-2),t.shape.slice(0,-2)).concat([d,f]);y.assert(l===m,()=>`Error in matMul: inner shapes (${l}) and (${m}) of Tensors with shapes ${r.shape} and ${t.shape} and transposeA=${e} and transposeB=${o} must match.`);let k=e?[x,l,d]:[x,d,l],_=o?[b,f,m]:[b,m,f],E=pe({inputs:{x:r},backend:n,attrs:{shape:k}}),R=pe({inputs:{x:t},backend:n,attrs:{shape:_}}),D=[E,R],P=Math.max(x,b),O=[E,R],M=[{type:"int32",data:[d]},{type:"int32",data:[f]},{type:"int32",data:[l]}],L,B,z=[P,d,f],U=A().get("WEBGPU_MATMUL_PROGRAM_TYPE");if(U<0){let q=A().getNumber("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL"),Y=q>0?q:n.thresholdToIncreaseWorkgroups,J=P*Math.ceil(d/32)*Math.ceil(f/32);J<=Y||d<=8&&J<=Y*2?P*d*f<=128?U=Mo.MatMulReduceProgram:P===1&&m>=2e3?U=Mo.MatMulSplitKProgram:U=Mo.MatMulSmallOutputSizeProgram:U=Mo.MatMulPackedProgram}switch(U){case Mo.MatMulReduceProgram:L=new Xg(z,e,o,s,p,a);break;case Mo.MatMulSplitKProgram:{if(B=vt({backend:n,attrs:{shape:z,value:0,dtype:r.dtype}}),L=new Qg(z,m,e,o),s||p){B=n.runWebGPUProgram(L,O,r.dtype,M,B);let Y=new Zg(B.shape,s,p,a),J=null,re=[B];s&&re.push(s),a&&re.push(a),p==="leakyrelu"&&(J=[{type:"float32",data:[i]}],Y.uniforms+=" alpha : f32,");let ne=n.runWebGPUProgram(Y,re,B.dtype,J);D.push(B);let ee=pe({inputs:{x:ne},backend:n,attrs:{shape:S}});D.push(ne);for(let oe of D)n.disposeData(oe.dataId);return ee}break}case Mo.MatMulSmallOutputSizeProgram:L=new Yg(k,_,z,e,o,s,p,a);break;case Mo.MatMulPackedProgram:let q=n.adapterInfo.isIntel();L=new jg(k,z,e,o,s,p,a,q);break;default:throw new Error(`Unsupported MatMulProgramType ${U}.`)}s&&O.push(s),a&&O.push(a),p==="leakyrelu"&&(M.push({type:"float32",data:[i]}),L.uniforms+=" alpha : f32,"),B=n.runWebGPUProgram(L,O,r.dtype,M,B);let j=pe({inputs:{x:B},backend:n,attrs:{shape:S}});D.push(B);for(let q of D)n.disposeData(q.dataId);return j}function Wie(r){let{inputs:t,backend:e,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=t,{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o;return _p({a:n,b:s,transposeA:p,transposeB:u,backend:e,bias:a,preluActivationWeights:i,leakyreluAlpha:l,activation:c})}var oz={kernelName:So,backendName:"webgpu",kernelFunc:Wie};var fm=class{constructor(t,e,o){this.variableNames=["AReal","AImag","BReal","BImag"],this.workgroupSize=[128,1,1],this.size=!0,this.outputShape=w.assertAndGetBroadcastShape(e,o),this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey=`binaryOpComplex_${t}`,this.op=t}getUserCode(){return`
2022-11-18 17:13:29 +01:00
fn binaryOpComplex(
areal : f32, aimag : f32, breal : f32, bimag : f32) -> f32 {
${qc(this.op,!1)}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if(index < uniforms.size) {
let areal = getARealByOutputIndex(index);
let aimag = getAImagByOutputIndex(index);
let breal = getBRealByOutputIndex(index);
let bimag = getBImagByOutputIndex(index);
setOutputAtIndex(index, binaryOpComplex(areal, aimag, breal, bimag));
}
}
`}};var Si=class{constructor(t,e,o){if(this.size=!0,this.variableNames=["A","B"],this.outputShape=w.assertAndGetBroadcastShape(e,o),this.dispatchLayout=X(this.outputShape),this.op=t,this.useSharedMemoryWithA=e.length<=1&&o.length>1&&e[0]<128,this.useSharedMemoryWithB=o.length<=1&&e.length>1&&o[0]<128,this.useSharedMemoryWithA||this.useSharedMemoryWithB)this.outputComponent=1,this.variableComponents=[1,1],this.lastDimensionSize=this.useSharedMemoryWithB?o[0]:e[0],this.shaderKey=`binary_${t}_${this.lastDimensionSize}`,this.type="shared",this.workgroupSize=[256,1,1];else{let n=e.length>0&&e[e.length-1]%4===0,s=o.length>0&&o[o.length-1]%4===0;n&&s?(this.outputComponent=4,this.variableComponents=[4,4]):n&&(y.isScalarShape(o)||o[o.length-1]===1)||s&&(y.isScalarShape(e)||e[e.length-1]===1)?(this.outputComponent=4,this.variableComponents=n?[4,1]:[1,4]):(this.outputComponent=1,this.variableComponents=[1,1]),this.type="nonshared",this.shaderKey=`binary_${t}_${this.variableComponents}`,this.workgroupSize=[128,1,1]}this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.outputComponent,1,1])}getUserCode(){let t,e=this.outputComponent===4?"vec4<f32>":"f32",o=`
2023-08-05 15:03:11 +02:00
fn binaryOperation(a : ${e}, b : ${e}) -> ${e} {
${qc(this.op,this.outputComponent===4)}
2022-11-18 17:13:29 +01:00
};
`;if(this.type==="shared"){let n=this.lastDimensionSize>1?`coords[${this.outputShape.length-1}]`:"0",s=this.useSharedMemoryWithB?`let a = getAByOutputIndex(index);
let b = sharedBuf[${n}];`:`let a = sharedBuf[${n}];
2023-08-05 15:03:11 +02:00
let b = getBByOutputIndex(index);`;t=`
2022-11-18 17:13:29 +01:00
${o}
var<workgroup> sharedBuf : array<f32, ${this.lastDimensionSize}>;
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
// Fill in the shared memory buffer.
let localIndex = i32(localId.x);
if(localIndex < ${this.lastDimensionSize}) {
sharedBuf[localIndex] = f32(${this.useSharedMemoryWithB?"B":"A"}[localIndex]);
}
workgroupBarrier();
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
${s}
setOutputAtIndex(index, binaryOperation(a, b));
}
}
2023-08-05 15:03:11 +02:00
`}else t=`
2022-11-18 17:13:29 +01:00
${o}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
2023-05-08 15:12:41 +02:00
let coords = getCoordsFromIndex(index * ${this.outputComponent});
2023-08-05 15:03:11 +02:00
let a = ${e}(getAByOutputCoords(coords));
let b = ${e}(getBByOutputCoords(coords));
2022-11-18 17:13:29 +01:00
setOutputAtIndex(index, binaryOperation(a, b));
}
}
`;return t}};function At(r){let{inputs:t}=r,{x:e}=t;return r.backend.incRef(e.dataId),{dataId:e.dataId,shape:e.shape,dtype:e.dtype}}var nz={kernelName:Co,backendName:"webgpu",kernelFunc:At};function xo(r){let{inputs:t,backend:e}=r,{real:o,imag:n}=t,s=e.makeTensorInfo(o.shape,"complex64"),a=e.tensorMap.get(s.dataId),i=At({inputs:{x:o},backend:e}),p=At({inputs:{x:n},backend:e});return a.complexTensorInfos={real:i,imag:p},s}var sz={kernelName:Ri,backendName:"webgpu",kernelFunc:xo};var Zr=class{constructor(t,e,o=""){this.variableNames=["A"],this.size=!0;let n=128;this.workgroupSize=[n,1,1],this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.op=e,o!==""&&(this.uniforms=o),this.shaderKey=`unary_${e}`}getUserCode(){return`
2022-11-18 17:13:29 +01:00
fn unaryOperation(a : f32) -> f32 {
${wi(this.op,!1)}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let a = getAByOutputIndex(index);
setOutputAtIndex(index, unaryOperation(a));
}
}
`}};function ye({opType:r,cpuKernelImpl:t,dtype:e}){return({inputs:o,backend:n})=>{let{x:s}=o,a=n,i=e||s.dtype;if(a.shouldExecuteOnCPU([s])&&t!=null){let u=a.tensorMap.get(s.dataId),c=t(u.values,i);return a.makeTensorInfo(s.shape,i,c)}let p=new Zr(s.shape,r);return a.runWebGPUProgram(p,[s],i)}}function et({opType:r,cpuKernelImpl:t,supportsComplex:e=!1,dtype:o}){return({inputs:n,backend:s})=>{let{a,b:i}=n,p=s;if(e&&a.dtype==="complex64"){let l=p.tensorMap.get(a.dataId),m=p.tensorMap.get(i.dataId),d,f;if(r!==fe.MUL)[d,f]=[[l.complexTensorInfos.real,m.complexTensorInfos.real],[l.complexTensorInfos.imag,m.complexTensorInfos.imag]].map(g=>{let[x,b]=g,C={dataId:x.dataId,dtype:x.dtype,shape:a.shape},S={dataId:b.dataId,dtype:b.dtype,shape:i.shape},k=new Si(r,a.shape,i.shape);return p.runWebGPUProgram(k,[C,S],dt(x.dtype,b.dtype))});else{let g=new fm(fe.COMPLEX_MULTIPLY_REAL,a.shape,i.shape),x=new fm(fe.COMPLEX_MULTIPLY_IMAG,a.shape,i.shape),b=[{dataId:l.complexTensorInfos.real.dataId,dtype:l.complexTensorInfos.real.dtype,shape:a.shape},{dataId:l.complexTensorInfos.imag.dataId,dtype:l.complexTensorInfos.imag.dtype,shape:a.shape},{dataId:m.complexTensorInfos.real.dataId,dtype:m.complexTensorInfos.real.dtype,shape:i.shape},{dataId:m.complexTensorInfos.imag.dataId,dtype:m.complexTensorInfos.imag.dtype,shape:i.shape}];d=p.runWebGPUProgram(g,b,"float32"),f=p.runWebGPUProgram(x,b,"float32")}let h=xo({inputs:{real:d,imag:f},backend:p});return p.disposeData(d.dataId),p.disposeData(f.dataId),h}let u=o||dt(a.dtype,i.dtype);if((a.dtype==="string"||i.dtype==="string"||p.shouldExecuteOnCPU([a,i]))&&t!=null){let l=p.tensorMap.get(a.dataId).values,m=p.tensorMap.get(i.dataId).values,d=a.dtype==="string"?w.fromUint8ToStringArray(l):l,f=a.dtype==="string"?w.fromUint8ToStringArray(m):m,[h,g]=t(a.shape,i.shape,d,f,u);return p.makeTensorInfo(g,u,h)}let c=new Si(r,a.shape,i.shape);return p.runWebGPUProgram(c,[a,i],u)}}var{addImpl:az,castImpl:iz,ceilImpl:uz,concatImpl:pz,equalImpl:cz,expImpl:lz,expm1Impl:mz,floorImpl:dz,floorDivImpl:fz,gatherNdImpl:hz,gatherV2Impl:gz,greaterEqualImpl:xz,greaterImpl:yz,lessEqualImpl:bz,lessImpl:Cz,logImpl:wz,maxImpl:Sz,maximumImpl:Iz,minimumImpl:vz,multiplyImpl:kz,negImpl:Nz,notEqualImpl:Tz,prodImpl:_z,rangeImpl:$z,rsqrtImpl:Ez,scatterImpl:Rz,simpleAbsImpl:Dz,sliceImpl:Az,stridedSliceImpl:Fz,stringNGramsImpl:Pz,subImpl:Oz,tileImpl:Mz,topKImpl:Lz,transposeImpl:Bz,uniqueImpl:KPt}=wc;var Uie=ye({opType:Z.ABS,cpuKernelImpl:Dz}),zz={kernelName:Xs,backendName:"webgpu",kernelFunc:Uie};var Gie=ye({opType:Z.ACOS}),Vz={kernelName:Vo,backendName:"webgpu",kernelFunc:Gie};var Hie=ye({opType:Z.ACOSH}),Wz={kernelName:Wo,backendName:"webgpu",kernelFunc:Hie};var Kie=et({opType:fe.ADD,cpuKernelImpl:az,supportsComplex:!0}),Uz={kernelName:io,backendName:"webgpu",kernelFunc:Kie};var ex=class{constructor(t){this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t[0],this.variableNames=t.map((e,o)=>`T${o}`),this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.shaderKey="addN"}getUserCode(){let t=[];this.variableNames.forEach(n=>{t.push(`let v${n} = get${n}ByOutputCoords(coords);`)});let e=this.variableNames.map(n=>`v${n}`).join(" + ");return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
for (var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if (flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
2023-08-05 15:03:11 +02:00
${t.join(`
2022-11-18 17:13:29 +01:00
`)}
2023-08-05 15:03:11 +02:00
setOutputAtIndex(flatIndex, ${e});
2022-11-18 17:13:29 +01:00
}
}
}
`}};function qie(r){let{inputs:t,backend:e}=r,o=t;if(o.length===1)return At({inputs:{x:o[0]},backend:e});let n=o.map(i=>i.dtype).reduce((i,p)=>dt(i,p)),s=o.map(i=>i.shape),a=new ex(s);return e.runWebGPUProgram(a,o,n)}var Gz={kernelName:Uo,backendName:"webgpu",kernelFunc:qie};var tx=class{constructor(t,e){this.variableNames=["A"],this.workgroupSize=[16,16,1];let o=new Array(t.length);for(let n=0;n<o.length;n++)o[n]=t[e[n]];this.outputShape=o,this.dispatchLayout={x:[0],y:[1]},this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[1,1,1]),this.shaderKey="transposeShared"}getUserCode(){y.assert(this.workgroupSize[0]===this.workgroupSize[1],()=>`Must be a square tile, current tile shape is ${this.workgroupSize[0]} x ${this.workgroupSize[1]}`);let t=this.workgroupSize[0];return`
2022-11-20 22:20:02 +01:00
var<workgroup> tile : array<array<f32, ${this.workgroupSize[0]+1}>, ${this.workgroupSize[0]}>;
2023-08-05 15:03:11 +02:00
${G()} {
var x = i32(workgroupId.x) * ${t} + i32(localId.x);
var y = i32(workgroupId.y) * ${t} + i32(localId.y);
2022-11-20 22:20:02 +01:00
let width = uniforms.outShape[0];
let height = uniforms.outShape[1];
if (x < width && y < height) {
tile[localId.y][localId.x] = f32(A[y * width + x]);
}
workgroupBarrier();
2023-08-05 15:03:11 +02:00
x = i32(workgroupId.y) * ${t} + i32(localId.x);
y = i32(workgroupId.x) * ${t} + i32(localId.y);
2022-11-20 22:20:02 +01:00
if (x < height && y < width) {
setOutputAtIndex((y * height + x), tile[localId.x]
[localId.y]);
}
}
`}};var rx=class{constructor(t,e){this.variableNames=["A"],this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0;let o=new Array(t.length);for(let n=0;n<o.length;n++)o[n]=t[e[n]];this.outputShape=o,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.newDim=e,this.shaderKey=`transpose_${e}`}getUserCode(){let t=ft(this.outputShape.length),e=Jv(this.newDim);return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if(flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
2022-11-20 22:20:02 +01:00
setOutputAtIndex(flatIndex, A[getIndexFromCoords${this.outputShape.length}D(
2023-08-05 15:03:11 +02:00
${t}(${e}), uniforms.aShape)]);
2022-11-20 22:20:02 +01:00
}
}
}
`}};function Jv(r){let t=r.length;if(t>6)throw Error(`Transpose for rank ${t} is not yet supported`);let e=new Array(t);for(let o=0;o<r.length;o++)e[r[o]]=`coords.${Oo(o)}`;return e.join()}function xr(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{perm:s}=o,a=e,i=n.shape.length,p=new Array(i);for(let c=0;c<p.length;c++)p[c]=n.shape[s[c]];if(e.shouldExecuteOnCPU([n])){let l=a.tensorMap.get(n.dataId).values,m=Bz(l,n.shape,n.dtype,s,p);return e.makeTensorInfo(p,n.dtype,m)}if(n.shape.length===2&&y.arraysEqual(s,[1,0])){let c=new tx(n.shape,s);return a.runWebGPUProgram(c,[n],n.dtype)}let u=new rx(n.shape,s);return a.runWebGPUProgram(u,[n],n.dtype)}var Hz={kernelName:po,backendName:"webgpu",kernelFunc:xr};var ox=class{constructor(t,e,o){this.variableNames=["x"],this.uniforms="reduceSize : i32,",this.size=!0,this.inputShape=[t.batchSize,t.inSize];let[n]=w.computeOutAndReduceShapes(this.inputShape,[1]);this.outputShape=n.length===0?[1]:n,t.inSize>=32768&&o>=512?this.workgroupSize=[512,1,1]:t.inSize>=4096?this.workgroupSize=[256,1,1]:this.workgroupSize=[64,1,1],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,[1,1,1]),this.reduceType=e,this.shaderKey=`reduce_${e}`}getUserCode(){let t="",e="0.0",o=this.workgroupSize[0];this.reduceType==="min"||this.reduceType==="max"?(t=`
2022-11-20 22:20:02 +01:00
if (isnan(candidate)) {
bestValue = uniforms.NAN;
} else if (!isnan(bestValue) && candidate ${this.reduceType==="min"?"<":">"} bestValue)
2023-08-05 15:03:11 +02:00
{ bestValue = candidate; }`,e="f32(x[offset])"):this.reduceType==="sum"||this.reduceType==="mean"?t=" bestValue = bestValue + candidate; ":this.reduceType==="prod"?(t=" bestValue = bestValue * candidate; ",e="1.0"):this.reduceType==="all"?(t=" bestValue = f32(bestValue >= 1.0 && candidate >= 1.0); ",e="1.0"):this.reduceType==="any"&&(t=" bestValue = f32(bestValue >= 1.0 || candidate >= 1.0); ",e="0.0");let n=this.reduceType==="mean"?"setOutputAtIndex(outputIndex, bestValue / f32(uniforms.reduceSize));":"setOutputAtIndex(outputIndex, bestValue);";return`
2022-11-20 22:20:02 +01:00
fn DIV_CEIL(a : u32, b : u32) -> u32 {
return ((a - 1u) / b + 1u);
}
${`
2023-01-06 19:23:06 +01:00
var<workgroup> xBestValues : array<f32, ${o}>;
2022-11-20 22:20:02 +01:00
`}
fn getOffset(outputIndex : i32) -> i32 {
let outputCoords = getCoordsFromIndex(outputIndex);
let offset = ${this.outputShape.length===1?"outputCoords":"outputCoords[0]"} * uniforms.reduceSize;
return offset;
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-01-06 19:23:06 +01:00
let outputIndex = index / ${o};
2022-11-20 22:20:02 +01:00
let offset = getOffset(outputIndex);
2023-08-05 15:03:11 +02:00
var bestValue = ${e};
2022-11-20 22:20:02 +01:00
let Length = uniforms.reduceSize;
2023-01-06 19:23:06 +01:00
let WorkPerThread = DIV_CEIL(u32(Length), ${o}u);
2022-11-20 22:20:02 +01:00
for (var k = i32(localId.x); k < Length && outputIndex < uniforms.size;
2023-01-06 19:23:06 +01:00
k = k + ${o}) {
2022-11-20 22:20:02 +01:00
let candidate = f32(x[offset + k]);
2023-08-05 15:03:11 +02:00
${t}
2022-11-20 22:20:02 +01:00
}
xBestValues[localId.x] = bestValue;
workgroupBarrier();
2023-01-06 19:23:06 +01:00
var reduceSize = min(u32(Length), ${o}u);
2022-11-20 22:20:02 +01:00
for (var currentSize = reduceSize / 2u; reduceSize > 1u;
currentSize = reduceSize / 2u) {
let interval = DIV_CEIL(reduceSize, 2u);
if (localId.x < currentSize) {
let candidate = xBestValues[localId.x + interval];
2023-08-05 15:03:11 +02:00
${t}
2022-11-20 22:20:02 +01:00
xBestValues[localId.x] = bestValue;
}
reduceSize = interval;
workgroupBarrier();
}
if (localId.x == 0u && outputIndex < uniforms.size) {
2023-01-06 19:23:06 +01:00
${n}
2022-11-20 22:20:02 +01:00
}
}
`}};var jie={mean:"float32",all:"bool",any:"bool"};function Jr(r,t,e,o,n){let s=r.shape.length,a=[],i=y.parseAxisParam(t,r.shape),p=i,u=w.getAxesPermutation(p,s),c=r;u!=null&&(c=xr({inputs:{x:r},attrs:{perm:u},backend:n}),p=w.getInnerMostAxes(p.length,s),a.push(c)),w.assertAxesAreInnerMostDims(o,p,s);let[l,m]=w.computeOutAndReduceShapes(c.shape,p),d=l;e&&(d=w.expandShapeToKeepDim(l,i));let f;if((o==="max"||o==="prod")&&n.shouldExecuteOnCPU([c])){let h=n.tensorMap.get(c.dataId).values;switch(o){case"max":let g=Sz(h,y.sizeFromShape(m),d,r.dtype);f=n.makeTensorInfo(d,r.dtype,g);break;case"prod":let{outVals:x,outShape:b,outDtype:C}=_z(c.shape,c.dtype,h,p);f=n.makeTensorInfo(b,C,x);break;default:throw new Error(`${o} CPU implementation is not yet supported.`)}}else{let h=y.sizeFromShape(m),x=y.sizeFromShape(c.shape)/h,b={windowSize:h,inSize:h,batchSize:x,outSize:1},C=jie[o]||oi(r.dtype),S=[{type:"int32",data:[h]}],k=new ox(b,o,n.device.limits.maxComputeWorkgroupSizeX),_=n.runWebGPUProgram(k,[c],C,S);a.push(_),f=pe({inputs:{x:_},attrs:{shape:d},backend:n})}return a.forEach(h=>n.disposeData(h.dataId)),f}function Xie(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{keepDims:s,axis:a}=o;return Jr(n,a,s,"all",e)}var Kz={kernelName:Go,backendName:"webgpu",kernelFunc:Xie};function Yie(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{keepDims:s,axis:a}=o;return Jr(n,a,s,"any",e)}var qz={kernelName:Ho,backendName:"webgpu",kernelFunc:Yie};var jc=class{constructor(t,e,o){this.workgroupSize=[64,1,1],this.variableNames=["x"],this.uniforms="infinityValue : f32,",this.size=!0;let n=[e];this.op=o==="min"?"<":">";let[s,a]=w.computeOutAndReduceShapes(t,n);this.outputShape=s.length===0?[1]:s,this.dispatchLayout=X(this.outputShape),y.sizeFromShape(a)<32?(this.type="plain",this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize)):(this.type="shared",this.dispatch=H(this.dispatchLayout,this.outputShape,[1,1,1])),this.inputShape=t,this.shaderKey=`argMinMax_${this.op}_${this.type}`}getUserCode(){let t=this.workgroupSize[0],e=()=>this.inputShape.length===1?"uniforms.xShape":`uniforms.xShape.${Oo(this.inputShape.length-1)}`,o=()=>{let n="";if(this.outputShape.length===1)this.inputShape.length!==1&&(n+="outputCoords,");else for(let s=0;s<this.outputShape.length;s++)n+=`outputCoords.${Oo(s)},`;return n};return this.type==="shared"?`
2022-11-18 17:13:29 +01:00
fn DIV_CEIL(a : u32, b : u32) -> u32 {
return ((a - 1u) / b + 1u);
}
${`
2023-08-05 15:03:11 +02:00
var<workgroup> xBestIndices : array<i32, ${t}>;
var<workgroup> xBestValues : array<f32, ${t}>;
2022-11-18 17:13:29 +01:00
`}
2023-08-05 15:03:11 +02:00
${G("index")} {
let outputIndex = index / ${t};
let reduceLength = ${e()};
2022-11-18 17:13:29 +01:00
var bestIndex = i32(localId.x);
var bestValue = uniforms.infinityValue;
let outputCoords = getCoordsFromIndex(outputIndex);
for (var k = i32(localId.x); k < reduceLength && outputIndex < uniforms.size;
2023-08-05 15:03:11 +02:00
k = k + ${t}) {
2023-01-06 19:23:06 +01:00
let candidate = getX(${o()} k);
2022-11-18 17:13:29 +01:00
if (!isnan(candidate) && candidate ${this.op} bestValue) {
bestValue = candidate;
bestIndex = k;
}
}
xBestValues[localId.x] = bestValue;
xBestIndices[localId.x] = bestIndex;
workgroupBarrier();
2023-08-05 15:03:11 +02:00
var reduceSize = min(u32(reduceLength), ${t}u);
2022-11-18 17:13:29 +01:00
for (var currentSize = reduceSize / 2u; reduceSize > 1u;
currentSize = reduceSize / 2u) {
let interval = DIV_CEIL(reduceSize, 2u);
if (localId.x < currentSize) {
let candidate = xBestValues[localId.x + interval];
if (candidate ${this.op} bestValue) {
bestValue = candidate;
xBestValues[localId.x] = bestValue;
xBestIndices[localId.x] = xBestIndices[localId.x + interval];
}
}
reduceSize = interval;
workgroupBarrier();
}
if (localId.x == 0u && outputIndex < uniforms.size) {
setOutputAtIndexI32(outputIndex, xBestIndices[localId.x]);
}
}
`:`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let outputCoords = getCoordsFromIndex(index);
var bestIndex = 0;
2023-01-06 19:23:06 +01:00
var bestValue = getX(${o()} 0);
2023-08-05 15:03:11 +02:00
let reduceLength = ${e()};
2022-11-18 17:13:29 +01:00
for (var i = 1; i < reduceLength; i++) {
2023-01-06 19:23:06 +01:00
let candidate = getX(${o()} i);
2022-11-18 17:13:29 +01:00
if (candidate ${this.op} bestValue) {
bestValue = candidate;
bestIndex = i;
}
}
setOutputAtIndexI32(index, bestIndex);
}
}
`}};function Qie(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=w.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=xr({inputs:{x:n},backend:e,attrs:{perm:i}}),u.push(p),a=w.getInnerMostAxes(a.length,p.shape.length)),w.assertAxesAreInnerMostDims("argMax",[a[0]],p.shape.length);let c=new jc(p.shape,a[0],"max"),l=[{type:"float32",data:[Number.NEGATIVE_INFINITY]}],m=e.runWebGPUProgram(c,[p],"int32",l);return u.forEach(d=>e.disposeData(d.dataId)),m}var jz={kernelName:Ys,backendName:"webgpu",kernelFunc:Qie};function Zie(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=w.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=xr({inputs:{x:n},backend:e,attrs:{perm:i}}),u.push(p),a=w.getInnerMostAxes(a.length,p.shape.length)),w.assertAxesAreInnerMostDims("argMin",[a[0]],p.shape.length);let c=new jc(p.shape,a[0],"min"),l=[{type:"float32",data:[Number.POSITIVE_INFINITY]}],m=e.runWebGPUProgram(c,[p],"int32",l);return u.forEach(d=>e.disposeData(d.dataId)),m}var Xz={kernelName:Qs,backendName:"webgpu",kernelFunc:Zie};var Jie=ye({opType:Z.ASIN}),Yz={kernelName:Ko,backendName:"webgpu",kernelFunc:Jie};var eue=ye({opType:Z.ASINH}),Qz={kernelName:qo,backendName:"webgpu",kernelFunc:eue};var tue=ye({opType:Z.ATAN}),Zz={kernelName:jo,backendName:"webgpu",kernelFunc:tue};var rue=et({opType:fe.ATAN2}),Jz={kernelName:Yo,backendName:"webgpu",kernelFunc:rue};var oue=ye({opType:Z.ATANH}),eV={kernelName:Xo,backendName:"webgpu",kernelFunc:oue};var nx=class{constructor(t){this.variableNames=["x"],this.uniforms="strides : vec2<i32>,",this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=t.outShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="poolWithFilterSizeEqualsOne"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d = coords[3];
let xRCCorner = coords.yz * uniforms.strides;
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
let value = getX(batch, xRCorner, xCCorner, d);
setOutputAtIndex(index, value);
}
}
`}};var Ba=class{constructor(t,e,o=!1,n=!1,s=!1){if(this.variableNames=["x"],this.uniforms="strides : vec2<i32>, pads : vec2<i32>, dilations : vec2<i32>, convDims : vec2<i32>, filterDims : vec2<i32>,",this.workgroupSize=[128,1,1],this.size=!0,e==="avg"&&o)throw new Error("Cannot compute positions for average pool.");this.outputShape=t.outShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.poolType=e,this.computePositions=o,this.flattenPositions=n,this.includeBatchIndex=s,this.shaderKey=`pool2D_${e}_${o}_${n}_${s}`}getUserCode(){let t;this.poolType==="avg"?t="resultValue = resultValue + value; count = count + 1.0;":this.computePositions?t=`let currMaxValue = mix(value, maxValue, maxValueFound);
2023-05-08 15:12:41 +02:00
if (value >= currMaxValue) {
maxValue = value;
maxValueFound = 1.0;
maxPosition = ${this.flattenPositions?this.includeBatchIndex?"((batch * uniforms.xShape[1] + xR) * uniforms.xShape[2] + xC) * uniforms.xShape[3] + d":"(xR * uniforms.xShape[2] + xC) * uniforms.xShape[3] + d":"wR * uniforms.filterDims.y + wC"};
2023-08-05 15:03:11 +02:00
}`:t="resultValue = max(value, resultValue);";let e="resultValue";return this.poolType==="avg"&&(e="resultValue / max(count, 1.0)"),`
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
2023-05-08 15:12:41 +02:00
let d = coords[3];
let xRCCorner = vec2<i32>(coords.yz) * uniforms.strides - uniforms.pads;
2022-11-18 17:13:29 +01:00
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
2023-05-08 15:12:41 +02:00
${this.computePositions?`var maxValue = 0.0;
var maxValueFound = 0.0;
var maxPosition = 0;`:`var resultValue = ${this.poolType==="avg"?"0.0":"-1.0 / pow(10.0, -20.0)"};`}
2022-11-18 17:13:29 +01:00
2023-05-08 15:12:41 +02:00
var count = 0.0;
for (var wR = 0; wR < uniforms.filterDims.x; wR = wR + uniforms.dilations.x) {
2022-11-18 17:13:29 +01:00
let xR = xRCorner + wR;
if (xR < 0 || xR >= uniforms.convDims.x) {
continue;
}
2023-05-08 15:12:41 +02:00
for (var wC = 0; wC < uniforms.filterDims.y; wC = wC + uniforms.dilations.y) {
2022-11-18 17:13:29 +01:00
let xC = xCCorner + wC;
if (xC < 0 || xC >= uniforms.convDims.y) {
continue;
}
2023-05-08 15:12:41 +02:00
let value = getX(batch, xR, xC, d);
2023-08-05 15:03:11 +02:00
${t}
2022-11-18 17:13:29 +01:00
}
}
2023-08-05 15:03:11 +02:00
${this.computePositions?"setOutputAtIndexI32(index, maxPosition);":`setOutputAtIndex(index, ${e});`}
2022-11-18 17:13:29 +01:00
}
}
`}},wu=class{constructor(t,e,o=!1,n=!1,s=!1){if(this.variableNames=["x"],this.uniforms="strides : vec3<i32>, pads : vec3<i32>, convDims : vec3<i32>, filterDims : vec3<i32>,",this.workgroupSize=[128,1,1],this.size=!0,e==="avg"&&o)throw new Error("Cannot compute positions for average pool.");this.outputShape=t.outShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.poolType=e,this.computePositions=o,this.flattenPositions=n,this.includeBatchIndex=s,this.shaderKey=`pool3D_${e}_${o}_${n}_${s}`}getUserCode(){let t;this.poolType==="avg"?t="resultValue += value; count += 1.0;":this.computePositions?t=`let currMaxValue = mix(value, maxValue, maxValueFound);
2023-05-08 15:12:41 +02:00
if (value >= currMaxValue) {
maxValue = value;
maxValueFound = 1.0;
maxPosition = ${this.flattenPositions?this.includeBatchIndex?"(((batch * uniforms.xShape.y + xD) * uniforms.xShape.z + xR) * uniforms.xShape.w + xC) * uniforms.xShape.u + ch":"((xD * uniforms.xShape.z + xR) * uniforms.xShape.w + xC) * uniforms.xShape.u + ch":"wD * uniforms.filterDims.y * uniforms.filterDims.y + wR * uniforms.filterDims.z + wC"};
2023-08-05 15:03:11 +02:00
}`:t="resultValue = max(value, resultValue);";let e="resultValue";return this.poolType==="avg"&&(e="resultValue / max(count, 1.0)"),`
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
2023-05-08 15:12:41 +02:00
let batch = coords.x;
let ch = coords.u;
2022-11-18 17:13:29 +01:00
2023-05-08 15:12:41 +02:00
let xCorner = vec3<i32>(coords.y, coords.z, coords.w) * uniforms.strides - uniforms.pads;
let xDCorner = xCorner.x;
let xRCorner = xCorner.y;
let xCCorner = xCorner.z;
2022-11-18 17:13:29 +01:00
2023-05-08 15:12:41 +02:00
${this.computePositions?`var maxValue = 0.0;
var maxValueFound = 0.0;
var maxPosition = 0;`:`var resultValue = ${this.poolType==="avg"?"0.0":"-1.0 / pow(10.0, -20.0)"};`}
var count = 0.0;
for (var wD = 0; wD < uniforms.filterDims.x; wD++) {
let xD = xDCorner + wD;
if (xD < 0 || xD >= uniforms.convDims.x) {
continue;
}
for (var wR = 0; wR < uniforms.filterDims.y; wR++) {
let xR = xRCorner + wR;
if (xR < 0 || xR >= uniforms.convDims.y) {
continue;
}
for (var wC = 0; wC < uniforms.filterDims.z; wC++) {
let xC = xCCorner + wC;
if (xC < 0 || xC >= uniforms.convDims.z) {
continue;
}
let value = getX(batch, xD, xR, xC, ch);
2023-08-05 15:03:11 +02:00
${t}
2023-05-08 15:12:41 +02:00
}
}
}
2023-08-05 15:03:11 +02:00
${this.computePositions?"setOutputAtIndexI32(index, maxPosition);":`setOutputAtIndex(index, ${e});`}
2022-11-18 17:13:29 +01:00
}
}
`}};function e0(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{reductionIndices:s,keepDims:a}=o;return Jr(n,s,a,"max",e)}var tV={kernelName:zn,backendName:"webgpu",kernelFunc:e0};function t0(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{keepDims:s,axis:a}=o;return Jr(n,a,s,"mean",e)}var rV={kernelName:Un,backendName:"webgpu",kernelFunc:t0};function sx(r,t,e,o){if(t.filterWidth===1&&t.filterHeight===1&&y.arraysEqual(t.inShape,t.outShape))return At({inputs:{x:r},backend:o});if(t.filterWidth===t.inWidth&&t.filterHeight===t.inHeight&&t.batchSize===1&&t.padInfo.type==="VALID"){let a=r.shape.length,i=pe({inputs:{x:r},backend:o,attrs:{shape:[r.shape[a-3]*r.shape[a-2],r.shape[a-1]]}}),p;e==="avg"?p=t0({inputs:{x:i},backend:o,attrs:{axis:0,keepDims:!1}}):(y.assert(e==="max",()=>`Invalid pool type ${e}`),p=e0({inputs:{x:i},backend:o,attrs:{reductionIndices:0,keepDims:!1}}));let u=pe({inputs:{x:p},backend:o,attrs:{shape:t.outShape}});return o.disposeData(i.dataId),o.disposeData(p.dataId),u}let n,s=[{type:"int32",data:[t.strideHeight,t.strideWidth]}];return t.filterHeight===1&&t.filterWidth===1?n=new nx(t):(e==="avg"?n=new Ba(t,"avg"):(y.assert(e==="max",()=>`Invalid pool type ${e}`),n=new Ba(t,"max")),s.push({type:"int32",data:[t.padInfo.top,t.padInfo.left]},{type:"int32",data:[t.dilationHeight,t.dilationWidth]},{type:"int32",data:[t.inHeight,t.inWidth]},{type:"int32",data:[t.effectiveFilterHeight,t.effectiveFilterWidth]})),o.runWebGPUProgram(n,[r],r.dtype,s)}function nue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1,c=w.computePool2DInfo(n.shape,s,a,u,i,p);return sx(n,c,"avg",e)}var oV={kernelName:Qo,backendName:"webgpu",kernelFunc:nue};function sue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{filterSize:s,strides:a,pad:i,dataFormat:p,dimRoundingMode:u}=o,c=[1,1,1],l=w.computePool3DInfo(n.shape,s,a,c,i,u,p),m=new wu(l,"avg"),d=[{type:"int32",data:[l.strideDepth,l.strideHeight,l.strideWidth]},{type:"int32",data:[l.padInfo.front,l.padInfo.top,l.padInfo.left]},{type:"int32",data:[l.inDepth,l.inHeight,l.inWidth]},{type:"int32",data:[l.effectiveFilterDepth,l.effectiveFilterHeight,l.effectiveFilterWidth]}];return e.runWebGPUProgram(m,[n],n.dtype,d)}var nV={kernelName:Zs,backendName:"webgpu",kernelFunc:sue};var ax=class{constructor(t){this.variableNames=["dy"],this.uniforms=`strides : vec2<i32>, pads : vec2<i32>, dilations : vec2<i32>, filterDims : vec2<i32>,
2023-08-05 15:03:11 +02:00
outHeight : i32, outWidth : i32, avgMultiplier : f32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.inShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="avgPool2DBackprop"}getUserCode(){return`
${G("index")} {
2023-01-06 19:23:06 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d = coords[3];
let dyRCCorner = vec2<i32>(coords.yz) - uniforms.pads;
let dyRCorner = dyRCCorner.x;
let dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
2023-05-08 15:12:41 +02:00
for (var wR = 0; wR < uniforms.filterDims[0]; wR = wR + uniforms.dilations[0]) {
let dyR = f32(dyRCorner + wR) / f32(uniforms.strides[0]);
2023-01-06 19:23:06 +01:00
if (dyR < 0.0 || dyR >= f32(uniforms.outHeight) || fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
2023-05-08 15:12:41 +02:00
for (var wC = 0; wC < uniforms.filterDims[1]; wC = wC + uniforms.dilations[1]) {
let dyC = f32(dyCCorner + wC) / f32(uniforms.strides[1]);
2023-01-06 19:23:06 +01:00
if (dyC < 0.0 || dyC >= f32(uniforms.outWidth) || fract(dyC) > 0.0) {
continue;
}
let idyC = i32(dyC);
let dyValue = getDy(batch, idyR, idyC, d);
dotProd = dotProd + dyValue * uniforms.avgMultiplier;
}
}
setOutputAtIndex(index, dotProd);
}
}
`}},ix=class{constructor(t){this.variableNames=["dy"],this.uniforms=`strides : vec3<i32>, pads : vec3<i32>, filterDims : vec3<i32>,
2023-08-05 15:03:11 +02:00
outDepth : i32, outHeight : i32, outWidth : i32, avgMultiplier : f32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.inShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="avgPool3DBackprop"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords.x;
let ch = coords.u;
let dyCorner = vec3<i32>(coords.y, coords.z, coords.w) - uniforms.pads;
let dyDCorner = dyCorner.x;
let dyRCorner = dyCorner.y;
let dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, d) with pos mask(:, :, :, ch) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
for (var wD = 0; wD < uniforms.filterDims[0]; wD++) {
let dyD = f32(dyDCorner + wD) / f32(uniforms.strides[0]);
if (dyD < 0.0 || dyD >= f32(uniforms.outDepth) || fract(dyD) > 0.0) {
continue;
}
let idyD = i32(dyD);
for (var wR = 0; wR < uniforms.filterDims[1]; wR++) {
let dyR = f32(dyRCorner + wR) / f32(uniforms.strides[1]);
if (dyR < 0.0 || dyR >= f32(uniforms.outHeight) || fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims[2]; wC++) {
let dyC = f32(dyCCorner + wC) / f32(uniforms.strides[2]);
if (dyC < 0.0 || dyC >= f32(uniforms.outWidth) || fract(dyC) > 0.0) {
continue;
}
let idyC = i32(dyC);
let dyValue = getDy(batch, idyD, idyR, idyC, ch);
dotProd += dyValue * uniforms.avgMultiplier;
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}};function aue(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s}=t,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=w.computePool3DInfo(a.shape,i,p,1,u,c),m=new ix(l),d=1/(l.filterDepth*l.filterHeight*l.filterWidth),f=[{type:"int32",data:[l.strideDepth,l.strideHeight,l.strideWidth]},{type:"int32",data:[l.effectiveFilterDepth-1-l.padInfo.front,l.effectiveFilterHeight-1-l.padInfo.top,l.effectiveFilterWidth-1-l.padInfo.left]},{type:"int32",data:[l.effectiveFilterDepth,l.effectiveFilterHeight,l.effectiveFilterWidth]},{type:"int32",data:[l.outDepth]},{type:"int32",data:[l.outHeight]},{type:"int32",data:[l.outWidth]},{type:"float32",data:[d]}];return e.runWebGPUProgram(m,[n],a.dtype,f)}var sV={kernelName:Ei,backendName:"webgpu",kernelFunc:aue};function iue(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s}=t,a=s;mm([n,s],"avgPoolGrad");let{filterSize:i,strides:p,pad:u}=o,c=w.computePool2DInfo(a.shape,i,p,1,u),l=new ax(c),m=1/(c.filterHeight*c.filterWidth),d=[{type:"int32",data:[c.strideHeight,c.strideWidth]},{type:"int32",data:[c.effectiveFilterHeight-1-c.padInfo.top,c.effectiveFilterWidth-1-c.padInfo.left]},{type:"int32",data:[c.dilationHeight,c.dilationWidth]},{type:"int32",data:[c.effectiveFilterHeight,c.effectiveFilterWidth]},{type:"int32",data:[c.outHeight]},{type:"int32",data:[c.outWidth]},{type:"float32",data:[m]}];return e.runWebGPUProgram(l,[n],a.dtype,d)}var aV={kernelName:$i,backendName:"webgpu",kernelFunc:iue};function uue(r){let{inputs:t,backend:e,attrs:o}=r,{a:n,b:s}=t,{transposeA:a,transposeB:i}=o;return _p({a:n,b:s,transposeA:a,transposeB:i,backend:e})}var iV={kernelName:Zo,backendName:"webgpu",kernelFunc:uue};var ux=class{constructor(t,e){this.variableNames=["source"],this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.rank=e.length,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.start=t,this.uniforms=`start : ${ft(t.length)}, `,this.shaderKey="slice"}getUserCode(){let t=ft(this.rank),e=pue(this.rank),o;return this.start.length===1?o=this.outputShape.map((s,a)=>"sourceLoc = uniforms.start + coords;"):o=this.outputShape.map((s,a)=>`sourceLoc.${r0[a]} = uniforms.start.${Oo(a)} + coords.${r0[a]};`),`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
2023-08-05 15:03:11 +02:00
var sourceLoc : ${t};
2022-11-18 17:13:29 +01:00
let coords = getCoordsFromIndex(index);
${o.join(`
`)}
2023-08-05 15:03:11 +02:00
setOutputAtIndex(index, getSource(${e}));
2022-11-18 17:13:29 +01:00
}
}
`}},r0=["x","y","z","w","u","v"];function pue(r){if(r===1)return"sourceLoc";if(r<=6)return r0.slice(0,r).map(t=>`sourceLoc.${t}`).join(",");throw Error(`Slicing for rank ${r} is not yet supported`)}function Hs(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{begin:s,size:a}=o,[i,p]=pt.parseSliceParams(n,s,a);if(pt.assertParamsValid(n,i,p),e.shouldExecuteOnCPU([n])||n.dtype==="string"){let l=e.tensorMap.get(n.dataId),m=Az(l.values,i,p,n.shape,n.dtype);return e.makeTensorInfo(p,n.dtype,m)}if(y.sizeFromShape(p)===0)return e.makeTensorInfo(p,n.dtype,[]);let u=new ux(i,p),c=[{type:"int32",data:i}];return e.runWebGPUProgram(u,[n],n.dtype,c)}var uV={kernelName:ha,backendName:"webgpu",kernelFunc:Hs};var cue=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{blockShape:s,crops:a}=o;y.assert(n.shape.length<=4,()=>"batchToSpaceND for rank > 4 with a WebGPU backend not implemented yet");let i=s.reduce((b,C)=>b*C),p=w.getReshaped(n.shape,s,i),u=w.getPermuted(p.length,s.length),c=w.getReshapedPermuted(n.shape,s,i),l=w.getSliceBeginCoords(a,s.length),m=w.getSliceSize(c,a,s.length),d=[],f=pe({inputs:{x:n},backend:e,attrs:{shape:p}}),h=xr({inputs:{x:f},backend:e,attrs:{perm:u}}),g=pe({inputs:{x:h},backend:e,attrs:{shape:c}}),x=Hs({inputs:{x:g},backend:e,attrs:{begin:l,size:m}});return d.push(f),d.push(h),d.push(g),d.forEach(b=>e.disposeData(b.dataId)),x},pV={kernelName:Js,backendName:"webgpu",kernelFunc:cue};var lue=`
2022-11-20 22:20:02 +01:00
fn bincount_write(index: i32, value: f32) {
${Yr("&result[index]","value","float32")}
2022-11-20 22:20:02 +01:00
}
2023-09-18 18:44:36 +02:00
`,mue=`
2022-11-20 22:20:02 +01:00
fn bincount_write(index: i32, value: f32) {
2023-01-06 19:23:06 +01:00
atomicStore(&result[index], bitcast<i32>(value));
2022-11-20 22:20:02 +01:00
}
`,Xc=class{constructor(t,e,o=!1){this.outputShape=[],this.variableNames=["x"],this.uniforms="binCountSize : i32,",this.workgroupSize=[64,1,1],this.atomic=!0,this.hasWeights=!0,this.binaryOutput=!1,this.outputShape=t,this.rank=t.length,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.binaryOutput=o,o&&(this.atomic=!1),this.hasWeights=e,this.hasWeights&&this.variableNames.push("w"),this.shaderKey=`bincount_${this.hasWeights}_${this.binaryOutput}_${this.rank}`}getUserCode(){return`
2023-09-18 18:44:36 +02:00
${this.binaryOutput?mue:lue}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
${this.rank===1?`if (index < uniforms.xShape) {
let indexVal = i32(getX(index));
if (indexVal < uniforms.binCountSize) {
2023-01-06 19:23:06 +01:00
let value = ${this.binaryOutput?1:this.hasWeights?"getW(index)":"1."};
2022-11-20 22:20:02 +01:00
bincount_write(indexVal, value);
}
}`:`let coord = getCoordsFromIndex(index);
if (coordsInBounds2D(coord, uniforms.xShape)) {
let indexVal = i32(getX(coord[0], coord[1]));
if (indexVal < uniforms.binCountSize) {
2023-01-06 19:23:06 +01:00
let value = ${this.binaryOutput?1:this.hasWeights?"getW(coord[0], coord[1])":"1."};
2022-11-20 22:20:02 +01:00
bincount_write(coord.x * uniforms.binCountSize + indexVal, value);
}
}`}
}
`}};function due(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,weights:s}=t,{size:a}=o,i=y.sizeFromShape(n.shape),u=y.sizeFromShape(s.shape)>0,c=[a],l=s.dtype,m=vt({backend:e,attrs:{shape:c,value:0,dtype:l}}),d=new Xc([i],u),f=[{type:"int32",data:[a]}],h=u?[n,s]:[n];return e.runWebGPUProgram(d,h,l,f,m)}var cV={kernelName:Jo,backendName:"webgpu",kernelFunc:due};var px=class{constructor(t){this.outputShape=[],this.variableNames=["s0","s1"],this.uniforms="s0Size : i32, s1Size : i32, ",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[t],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="broadcastArgs"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
var s0 = 1.0;
var s1 = 1.0;
let indexS0 = index - uniforms.size + uniforms.s0Size;
let indexS1 = index - uniforms.size + uniforms.s1Size;
if (indexS0 >= 0) {
s0 = getS0(indexS0);
}
if (indexS1 >= 0) {
s1 = getS1(indexS1);
}
if (s0 == 1.0) {
setOutputAtIndex(index, s1);
} else if (s1 == 1.0) {
setOutputAtIndex(index, s0);
} else if (s0 != s1) {
setOutputAtIndex(index, uniforms.NAN);
} else {
setOutputAtIndex(index, s0);
}
}
}
`}};function fue(r){let{inputs:t,backend:e}=r,{s0:o,s1:n}=t;if(e.shouldExecuteOnCPU([o,n])){let c=e.tensorMap.get(o.dataId),l=e.tensorMap.get(n.dataId),m=c.values,d=l.values,f=w.assertAndGetBroadcastShape(Array.from(m),Array.from(d));return e.makeTensorInfo([f.length],"int32",Int32Array.from(f))}let s=y.sizeFromShape(o.shape),a=y.sizeFromShape(n.shape),i=Math.max(s,a),p=new px(i),u=[{type:"int32",data:[s]},{type:"int32",data:[a]}];return e.runWebGPUProgram(p,[o,n],"int32",u)}var lV={kernelName:ea,backendName:"webgpu",kernelFunc:fue};var o0=et({opType:fe.NOT_EQUAL,dtype:"bool",cpuKernelImpl:Tz}),mV={kernelName:Yn,backendName:"webgpu",kernelFunc:o0};function Ii(r){let{inputs:t,backend:e}=r,{input:o}=t,n=e.tensorMap.get(o.dataId);return At({inputs:{x:n.complexTensorInfos.real},backend:e})}var dV={kernelName:Gi,backendName:"webgpu",kernelFunc:Ii};function fV(r,t){let e=new Zr(r.shape,Z.TO_INT),o=t.runWebGPUProgram(e,[r],"int32");return{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}function n0(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{dtype:s}=o;if(s==="complex64"){if(n.dtype==="complex64")return At({inputs:{x:n},backend:e});let a=Ur(n.shape),i=n0({inputs:{x:n},backend:e,attrs:{dtype:"float32"}}),p=xo({inputs:{real:i,imag:a},backend:e});return a.dispose(),e.disposeData(i.dataId),p}if(n.dtype==="complex64"){let a=Ii({inputs:{input:n},backend:e}),i=n0({inputs:{x:a},backend:e,attrs:{dtype:s}});return e.disposeData(a.dataId),i}if(!y.hasEncodingLoss(n.dtype,s)){let a=At({inputs:{x:n},backend:e});return{dataId:a.dataId,shape:a.shape,dtype:s}}if(e.shouldExecuteOnCPU([n])){let a=e.tensorMap.get(n.dataId).values,[i,p,u]=iz(a,n.shape,n.dtype,s);return e.makeTensorInfo(i,p,u)}if(s==="int32")return fV(n,e);if(s==="bool"){let a=e.makeTensorInfo([],"bool",y.getTypedArrayFromDType("bool",1)),p=o0({inputs:{a:n,b:a},backend:e});return e.disposeData(a.dataId),p}throw new Error(`Error in Cast: failed to cast ${n.dtype} to ${s}`)}var hV={kernelName:yo,backendName:"webgpu",kernelFunc:n0};var hue=ye({opType:Z.CEIL,cpuKernelImpl:uz}),gV={kernelName:en,backendName:"webgpu",kernelFunc:hue};var cx=class{constructor(t){this.variableNames=["A"],this.uniforms="minVal : f32, maxVal : f32,",this.workPerThread=4,this.workgroupSize=[64,1,1],this.outputComponent=4,this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.shaderKey="clipVec4"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if(index < uniforms.size) {
let value = getAByOutputIndex(index);
2023-01-06 19:23:06 +01:00
var clampedValue = clamp(
value, vec4<f32>(uniforms.minVal), vec4<f32>(uniforms.maxVal));
clampedValue = select(clampedValue, value, isnanVec4(value));
2022-11-18 17:13:29 +01:00
setOutputAtIndex(index, clampedValue);
}
}
`}};var lx=class{constructor(t){this.variableNames=["A"],this.uniforms="minVal : f32, maxVal : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="clip"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if(index < uniforms.size) {
let value = getAByOutputIndex(index);
if (isnan(value)) {
setOutputAtIndex(index, value);
return;
}
setOutputAtIndex(index, clamp(value, uniforms.minVal, uniforms.maxVal));
}
}
`}};function gue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{clipValueMin:s,clipValueMax:a}=o,i,p=[{type:"float32",data:[s]},{type:"float32",data:[a]}];return y.sizeFromShape(n.shape)%4===0?i=new cx(n.shape):i=new lx(n.shape),e.runWebGPUProgram(i,[n],n.dtype,p)}var xV={kernelName:bo,backendName:"webgpu",kernelFunc:gue};var mx=class{constructor(t){this.outputShape=[],this.variableNames=["real","imag"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="complexAbs"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let re = abs(getRealByOutputIndex(index));
let im = abs(getImagByOutputIndex(index));
let mx = max(re, im);
// The length function in wgsl may be not underflow-safe on some GPUs.
// So the safe solution is to ensure underflow-safety in all cases.
setOutputAtIndex(index, select(mx * length(vec2<f32>(1, min(re, im)/mx)), 0.0, mx == 0.0));
}
}
`}};function yV(r,t){return{dataId:t.dataId,dtype:t.dtype,shape:r.shape}}function xue(r){let{inputs:t,backend:e}=r,{x:o}=t,n=e.tensorMap.get(o.dataId),s=new mx(o.shape),a=[yV(o,n.complexTensorInfos.real),yV(o,n.complexTensorInfos.imag)];return e.runWebGPUProgram(s,a,a[0].dtype)}var bV={kernelName:Di,backendName:"webgpu",kernelFunc:xue};var dx=class{constructor(t){this.uniforms="",this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=w.computeOutShape(t,1),this.variableNames=t.map((e,o)=>`T${o}`),this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.offsetLength=t.length-1;for(let e=0;e<this.offsetLength;e++)this.uniforms+=`offset${e} : i32,`;this.shaderKey="concat"}getUserCode(){let t=[];if(this.offsetLength>0){t.push("if (yC < uniforms.offset0){ setOutputAtCoords(coords.x, coords.y, getT0(yR, yC)); }");for(let s=1;s<this.offsetLength;s++)t.push(`else if (yC < uniforms.offset${[s]}){ setOutputAtCoords(coords.x, coords.y, getT${s}(yR, yC - uniforms.offset${s-1})); }`);let o=this.offsetLength,n=this.offsetLength-1;t.push(`else { setOutputAtCoords(coords.x, coords.y, getT${o}(yR, yC - uniforms.offset${n})); }`)}else t.push("setOutputAtCoords(coords.x, coords.y, getT0(yR, yC));");return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if(flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
let yR = coords.x;
let yC = coords.y;
2023-08-05 15:03:11 +02:00
${t.join(`
2022-11-18 17:13:29 +01:00
`)}
}
}
}
`}};function $p(r){let{inputs:t,backend:e}=r,{input:o}=t,n=e.tensorMap.get(o.dataId);return At({inputs:{x:n.complexTensorInfos.imag},backend:e})}var CV={kernelName:Vi,backendName:"webgpu",kernelFunc:$p};function Yc(r,t,e){let o=r[0].dtype;if(o==="complex64"){let f=r.map(C=>Ii({inputs:{input:C},backend:e})),h=r.map(C=>$p({inputs:{input:C},backend:e})),g=Yc(f,t,e),x=Yc(h,t,e),b=xo({inputs:{real:g,imag:x},backend:e});return f.forEach(C=>e.disposeData(C.dataId)),h.forEach(C=>e.disposeData(C.dataId)),e.disposeData(g.dataId),e.disposeData(x.dataId),b}let n=e.shouldExecuteOnCPU(r);if(o==="string"&&(n=!0),n){let f=r.map(k=>{let E=[-1,y.sizeFromShape(k.shape.slice(t))];return pe({inputs:{x:k},backend:e,attrs:{shape:E}})}),h=f.map(k=>({vals:e.readSync(k.dataId),shape:k.shape})),g=w.computeOutShape(f.map(k=>k.shape),1),x=f[0].shape[0]===1,b=pz(h,g,o,x),C=w.computeOutShape(r.map(k=>k.shape),t),S=e.makeTensorInfo(C,o,b);return f.forEach(k=>e.disposeData(k.dataId)),S}let s=e.device.limits.maxStorageBuffersPerShaderStage-1;if(r.length>s){let f=[];for(let g=0;g<r.length;g+=s){let x=r.slice(g,g+s);f.push(Yc(x,t,e))}let h=Yc(f,t,e);for(let g of f)e.disposeData(g.dataId);return h}let{tensors2D:a,outShape:i}=yue(r,t,e),p=a.map(f=>f.shape),u=new dx(p),c=[],l=new Array(p.length-1);if(l.length>0){l[0]=p[0][1],c.push({type:"int32",data:[l[0]]});for(let f=1;f<l.length;f++)l[f]=l[f-1]+p[f][1],c.push({type:"int32",data:[l[f]]})}let m=e.runWebGPUProgram(u,a,a[0].dtype,c);a.forEach(f=>e.disposeData(f.dataId));let d=pe({inputs:{x:m},backend:e,attrs:{shape:i}});return e.disposeData(m.dataId),d}function yue(r,t,e){let o=w.computeOutShape(r.map(s=>s.shape),t);return{tensors2D:r.map(s=>pe({inputs:{x:s},backend:e,attrs:{shape:[y.sizeFromShape(s.shape.slice(0,t)),y.sizeFromShape(s.shape.slice(t))]}})),outShape:o}}function s0(r){let{inputs:t,backend:e,attrs:o}=r,{axis:n}=o,s=y.parseAxisParam(n,t[0].shape)[0],a=t.map(u=>u.shape);w.assertParamsConsistent(a,s);let i=w.computeOutShape(t.map(u=>u.shape),s);if(y.sizeFromShape(i)===0)return e.makeTensorInfo(i,t[0].dtype,[]);let p=t.filter(u=>y.sizeFromShape(u.shape)>0);return p.length===1?At({inputs:{x:p[0]},backend:e}):Yc(p,s,e)}var wV={kernelName:ta,backendName:"webgpu",kernelFunc:s0};function bue(r,t,e,o,n=!1,s=null,a=!1,i=4,p=4,u=4){let c=D=>{switch(D){case 1:return"resData = f32(x[xIndex]);";case 3:return"resData = vec3<f32>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);";case 4:return"resData = vec4<f32>(x[xIndex / 4]);";default:throw new Error(`innerElementSize ${D} is not supported.`)}},l=D=>{switch(D){case 1:return"return f32(W[row * uniforms.wShape[3] + col]);";case 4:return"return vec4<f32>(W[(row * uniforms.wShape[3] + col) / 4]);";default:throw new Error(`innerElementSize ${D} is not supported.`)}},m=r?`
2022-11-18 17:13:29 +01:00
let coord = vec4<i32>(batch, xRow, xCol, xCh);
`:`
let coord = vec4<i32>(batch, xCh, xRow, xCol);
2022-11-20 22:20:02 +01:00
`,d=r?`
2022-11-18 17:13:29 +01:00
let coords = vec4<i32>(
batch,
row / outWidth,
row % outWidth,
col);
`:`
let coords = vec4<i32>(
batch,
row,
col / outWidth,
col % outWidth);
2022-11-20 22:20:02 +01:00
`,f=r?"uniforms.xShape[1]":"uniforms.xShape[2]",h=r?"uniforms.xShape[2]":"uniforms.xShape[3]",g=r?"row":"col",x=r?"col":"row",b=`
2022-11-18 17:13:29 +01:00
let inChannels = uniforms.wShape[2];
let outWidth = ${r?"uniforms.outShape[2]":"uniforms.outShape[3]"};
let outRow = ${g} / outWidth;
let outCol = ${g} % outWidth;
2022-11-20 22:20:02 +01:00
let WRow = ${x} / (uniforms.filterDims[1] * inChannels);
let WCol = ${x} / inChannels % uniforms.filterDims[1];
2023-05-08 15:12:41 +02:00
let xRow = outRow * uniforms.strides[0] + uniforms.dilations[0] * WRow - uniforms.pads[0];
let xCol = outCol * uniforms.strides[1] + uniforms.dilations[1] * WCol - uniforms.pads[1];
2022-11-20 22:20:02 +01:00
let xCh = ${x} % inChannels;
2023-05-08 15:12:41 +02:00
var resData = ${Ae(i)}(0.0);
2022-11-18 17:13:29 +01:00
// The bounds checking is always needed since we use it to pad zero for
// the 'same' padding type.
2022-11-20 22:20:02 +01:00
if (xRow >= 0 && xRow < ${f} && xCol >= 0 && xCol < ${h}) {
2022-11-18 17:13:29 +01:00
${m}
let xIndex = getIndexFromCoords4D(coord, uniforms.xShape);
${c(i)}
}
2023-08-05 15:03:11 +02:00
return resData;`,C=r?t&&o?`
2022-11-18 17:13:29 +01:00
${b}`:`
if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
${b}
}
2023-08-05 15:03:11 +02:00
return ${Ae(i)}(0.0);`:o&&e?`
2022-11-18 17:13:29 +01:00
${b}`:`
if (row < uniforms.dimInner && col < uniforms.dimBOuter) {
${b}
}
2023-05-08 15:12:41 +02:00
return ${Ae(i)}(0.0);`,S=`${l(p)}`,k=Ae(u),_=r?Ae(i):Ae(p),E=r?Ae(p):Ae(i);return`
${dr(s,a,u===4,4)}
2023-08-05 15:03:11 +02:00
fn mm_readA(batch: i32, row : i32, col : i32) -> ${_} {
${r?C:S}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
fn mm_readB(batch: i32, row : i32, col : i32) -> ${E} {
${r?S:C}
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
fn mm_write(batch: i32, row : i32, col : i32, valueIn : ${k}) {
2022-11-18 17:13:29 +01:00
if (row < uniforms.dimAOuter && col < uniforms.dimBOuter)
{
var value = valueIn;
let outWidth = ${r?"uniforms.outShape[2]":"uniforms.outShape[3]"};
2022-11-20 22:20:02 +01:00
${d}
${Qr(n,s)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}`}var fx=class{constructor(t,e,o,n,s=!1,a=null,i=!1,p=!1){this.variableNames=["x","W"],this.uniforms="filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, dilations : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=t.outShape,this.isChannelsLast=t.dataFormat==="channelsLast",this.isVec4=((t.inChannels%4===0||t.inChannels%3===0)&&this.isChannelsLast||t.outWidth%4===0&&!this.isChannelsLast)&&t.outChannels%4===0,this.dispatchLayout=this.isChannelsLast?{x:[3],y:[1,2],z:[0]}:{x:[2,3],y:[1],z:[0]},this.workgroupSize=pm(this.dispatchLayout,this.outputShape,this.isVec4),this.elementsPerThread=cm(this.dispatchLayout,this.outputShape,this.isVec4),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,this.elementsPerThread),this.isVec4?(this.outputComponent=4,this.isChannelsLast&&t.inChannels%4!==0?(this.innerElementSize=3,this.variableComponents=[1,4]):(this.innerElementSize=4,this.variableComponents=[4,4]),s&&(this.variableNames.push("bias"),this.variableComponents.push(4)),i&&(this.variableNames.push("preluActivationWeights"),this.variableComponents.push(4))):(this.innerElementSize=this.elementsPerThread[0],s&&this.variableNames.push("bias"),i&&this.variableNames.push("preluActivationWeights")),this.sequentialAccessByThreads=p,this.addBias=s,this.activation=a,this.hasPreluActivationWeights=i,this.tileAOuter=this.workgroupSize[1]*this.elementsPerThread[1],this.tileBOuter=this.workgroupSize[0]*this.elementsPerThread[0],this.tileInner=Math.max(this.workgroupSize[0]*this.innerElementSize,this.workgroupSize[1]),this.fitAOuter=e%this.tileAOuter===0,this.fitBOuter=o%this.tileBOuter===0,this.fitInner=n%this.tileInner===0,this.shaderKey=`conv2DMM_${this.elementsPerThread}_${this.activation}}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.innerElementSize}_${this.isChannelsLast}_${this.sequentialAccessByThreads}`}getUserCode(){let t=this.isVec4?Np(this.elementsPerThread,this.workgroupSize,!this.isChannelsLast,this.tileInner):Tp(this.elementsPerThread,this.workgroupSize,!this.isChannelsLast,this.tileInner,!1,null,this.sequentialAccessByThreads),e=this.isVec4?[this.innerElementSize,4,4]:[1,1,1];return`
2023-09-18 18:44:36 +02:00
${bue(this.isChannelsLast,this.fitAOuter,this.fitBOuter,this.fitInner,this.addBias,this.activation,this.hasPreluActivationWeights,e[0],e[1],e[2])}
2023-08-05 15:03:11 +02:00
${t}
`}};var hx=class{constructor(t,e=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms="filterDims: vec2<i32>, pads: vec2<i32>, strides: vec2<i32>, dilations: vec2<i32>,",this.workgroupSize=[4,4,8],this.outputShape=t.outShape,this.isChannelsLast=t.dataFormat==="channelsLast",this.dispatchLayout=this.isChannelsLast?{x:[2],y:[1],z:[0,3]}:{x:[3],y:[2],z:[0,1]},this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.addBias=e,this.activation=o,this.hasPreluActivationWeights=n,e&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.shaderKey=`conv2dnaive_${this.activation}_${this.isChannelsLast}`}getUserCode(){return`
${dr(this.activation,this.hasPreluActivationWeights,!1,4)}
2022-11-18 17:13:29 +01:00
fn readInp(batch : i32, row : i32, col : i32, chan : i32) -> f32{
let coords = vec4<i32>(batch, row, col, chan);
if (coordsInBounds4D(coords, uniforms.xShape)) {
return getX(batch, row, col, chan);
} else {
return 0.0;
}
}
fn readFilt(row : i32, col : i32, xChannel : i32, outChannel : i32) -> f32{
let coords = vec4<i32>(row, col, xChannel, outChannel);
if(coordsInBounds4D(coords, uniforms.wShape)) {
return getW(row, col, xChannel, outChannel);
} else {
return 0.0;
}
}
fn writeResult(batch : i32, row : i32, col : i32, chan : i32, valueIn : f32) {
let coords = ${this.isChannelsLast?"vec4<i32>(batch, row, col, chan);":"vec4<i32>(batch, chan, row, col);"}
if (coordsInBounds4D(coords, uniforms.outShape)) {
var value = valueIn;
${Qr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords.x, coords.y, coords.z, coords.w, value);
}
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
let coords = getOutputCoords();
let batch = coords[0];
let outChannel = ${this.isChannelsLast?"coords[3];":"coords[1];"}
let outRow = ${this.isChannelsLast?"coords[1];":"coords[2];"}
let outCol = ${this.isChannelsLast?"coords[2];":"coords[3];"}
var acc : f32 = 0.0;
for (var row = 0; row < uniforms.filterDims[0]; row = row + 1) {
for (var col = 0; col < uniforms.filterDims[1]; col = col + 1) {
2023-05-08 15:12:41 +02:00
let xRow = outRow * uniforms.strides[0] + uniforms.dilations[0] * row - uniforms.pads[0];
let xCol = outCol * uniforms.strides[1] + uniforms.dilations[1] * col - uniforms.pads[1];
2022-11-18 17:13:29 +01:00
for (var xChannel = 0; xChannel < ${this.isChannelsLast?"uniforms.xShape[3];":"uniforms.xShape[1];"} xChannel = xChannel + 1) {
${this.isChannelsLast?"let v = readInp(batch, xRow, xCol, xChannel);":"let v = readInp(batch, xChannel, xRow, xCol);"}
let f = readFilt(row, col, xChannel, outChannel);
acc = acc + v * f;
}
}
}
writeResult(batch, outRow, outCol, outChannel, acc);
}
`}};var gx=class{constructor(t,e){this.variableNames=["x"],this.uniforms=`pads : vec2<i32>, strides : vec2<i32>, dilations : vec2<i32>, outWidth : i32, itemsPerBlockRow : i32,
2023-08-05 15:03:11 +02:00
inChannels : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.isChannelsLast=e,this.shaderKey=`im2col_${this.isChannelsLast}`}getUserCode(){let t=this.isChannelsLast?1:2,e=this.isChannelsLast?2:3,o=this.isChannelsLast?"coords[1]":"coords[2]",n=this.isChannelsLast?"coords[2]":"coords[1]",s=this.isChannelsLast?"getX(batch, xRow, xCol, ch)":"getX(batch, ch, xRow, xCol)";return`
${G("index")} {
2022-11-20 22:20:02 +01:00
let coords = getCoordsFromIndex(index);
if(index < uniforms.size) {
let batch = coords[0];
let row = ${o};
let col = ${n};
2023-05-08 15:12:41 +02:00
let offsetY = (row / uniforms.outWidth) * uniforms.strides[0] - uniforms.pads[0];
let xRow = offsetY + uniforms.dilations[0] * (col / uniforms.itemsPerBlockRow);
2022-11-20 22:20:02 +01:00
var value = 0.0;
2023-08-05 15:03:11 +02:00
if(xRow < uniforms.xShape[${t}] && xRow >= 0) {
2023-05-08 15:12:41 +02:00
let offsetX = (row % uniforms.outWidth) * uniforms.strides[1] -
uniforms.pads[1];
let xCol = offsetX + uniforms.dilations[1] * ((col %
2022-11-20 22:20:02 +01:00
uniforms.itemsPerBlockRow) / uniforms.inChannels);
let ch = col % uniforms.inChannels;
2023-08-05 15:03:11 +02:00
if(xCol < uniforms.xShape[${e}] && xCol >= 0) {
2022-11-20 22:20:02 +01:00
value = ${s};
}
}
setOutputAtIndex(index, value);
}
}
`}};function xx(r,t){let e=r.length;return e>=3?t?[...r.slice(0,-3),r[e-3]*r[e-2],r[e-1]]:[...r.slice(0,-3),r[e-3],r[e-2]*r[e-1]]:!t&&e===1&&r[0]>1?[r[0],1]:null}function Cue({x:r,filter:t,convInfo:e,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=e.dataFormat==="channelsLast",u=!p,c=!1,l=p&&e.filterHeight===e.inHeight&&e.filterWidth===e.inWidth&&e.padInfo.type==="VALID",m=[],d,f;if(l){let x=e.inHeight*e.inWidth*e.inChannels;d=pe({inputs:{x:r},backend:o,attrs:{shape:[1,e.batchSize,x]}}),f=pe({inputs:{x:t},backend:o,attrs:{shape:[1,x,e.outChannels]}})}else d=pe({inputs:{x:r},backend:o,attrs:{shape:p?[e.batchSize,e.inHeight*e.inWidth,e.inChannels]:[e.batchSize,e.inChannels,e.inHeight*e.inWidth]}}),f=pe({inputs:{x:t},backend:o,attrs:{shape:[1,e.inChannels,e.outChannels]}});if(m.push(d),m.push(f),s!=null){let x=xx(s.shape,p);x!=null&&(s=pe({inputs:{x:s},backend:o,attrs:{shape:x}}),m.push(s))}if(n!=null){let x=xx(n.shape,p);x!=null&&(n=pe({inputs:{x:n},backend:o,attrs:{shape:x}}),m.push(n))}let h=_p({a:p?d:f,b:p?f:d,transposeA:u,transposeB:c,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),g=pe({inputs:{x:h},backend:o,attrs:{shape:e.outShape}});m.push(h);for(let x of m)o.disposeData(x.dataId);return g}function wue({x:r,filter:t,convInfo:e,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let{filterWidth:p,filterHeight:u,inChannels:c,strideWidth:l,strideHeight:m,padInfo:d,outWidth:f,outHeight:h,dilationWidth:g,dilationHeight:x,dataFormat:b}=e,C=b==="channelsLast",S=p*u*c,k=h*f,_=C?[e.batchSize,k,S]:[e.batchSize,S,k],E=new gx(_,C),R=[{type:"int32",data:[d.top,d.left]},{type:"int32",data:[m,l]},{type:"int32",data:[x,g]},{type:"int32",data:[f]},{type:"int32",data:[c*p]},{type:"int32",data:[c]}],D=o.runWebGPUProgram(E,[r],r.dtype,R),P=[];P.push(D);let O=pe({inputs:{x:t},backend:o,attrs:{shape:[1,S,-1]}});if(P.push(O),s!=null){let U=xx(s.shape,C);U!=null&&(s=pe({inputs:{x:s},backend:o,attrs:{shape:U}}),P.push(s))}if(n!=null){let U=xx(n.shape,C);U!=null&&(n=pe({inputs:{x:n},backend:o,attrs:{shape:U}}),P.push(n))}let B=_p({a:C?D:O,b:C?O:D,transposeA:!C,transposeB:!1,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),z=pe({inputs:{x:B},backend:o,attrs:{shape:e.outShape}});P.push(B);for(let U of P)o.disposeData(U.dataId);return z}function yx({x:r,filter:t,convInfo:e,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=n!=null,u=s!=null,c=e.dataFormat==="channelsLast",l=c&&e.filterHeight===e.inHeight&&e.filterWidth===e.inWidth&&e.padInfo.type==="VALID",m=A().getBool("WEBGPU_USE_NAIVE_CONV2D_DEBUG");if(!m&&(l||e.filterHeight===1&&e.filterWidth===1&&e.dilationHeight===1&&e.dilationWidth===1&&e.strideHeight===1&&e.strideWidth===1&&(e.padInfo.type==="SAME"||e.padInfo.type==="VALID")))return Cue({x:r,filter:t,convInfo:e,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a});let d=A().getNumber("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL"),f=d>-1?d:o.thresholdToIncreaseWorkgroups,h=e.batchSize*Math.ceil(e.outHeight*e.outWidth/32)*Math.ceil(e.outChannels/32);if(A().getBool("WEBGPU_CONV_SEPARATE_IM2COL_SHADER")||h<=f)return wue({x:r,filter:t,convInfo:e,backend:o,bias:n,preluActivationWeights:s,leakyreluAlpha:a,activation:i});let g,x=[e.padInfo.top,e.padInfo.left],b=[{type:"int32",data:[e.filterHeight,e.filterWidth]},{type:"int32",data:[...x]},{type:"int32",data:[e.strideHeight,e.strideWidth]},{type:"int32",data:[e.dilationHeight,e.dilationWidth]}];if(m)g=new hx(e,p,i,u);else{let _=c?e.outHeight*e.outWidth:e.outChannels,E=c?e.outChannels:e.outHeight*e.outWidth,R=e.filterHeight*e.filterWidth*e.inChannels;b.push({type:"int32",data:[_]},{type:"int32",data:[E]},{type:"int32",data:[R]});let D=o.adapterInfo.isIntel();g=new fx(e,_,E,R,p,i,u,D)}let C=[],S=[r,t];p&&(!c&&n.shape.length===1&&(n=pe({inputs:{x:n},backend:o,attrs:{shape:[n.shape[0],1,1]}}),C.push(n)),S.push(n)),u&&(!c&&s.shape.length===1&&(s=pe({inputs:{x:s},backend:o,attrs:{shape:[s
2023-08-05 15:03:11 +02:00
${G()} {
2023-05-08 15:12:41 +02:00
let batch = i32(globalId.z) / uniforms.outShape[1];
let r = i32(globalId.z) % uniforms.outShape[1];
let c = i32(globalId.y) * ${this.workPerThread};
let d1 = i32(globalId.x) * 4;
let dyCorner = vec2<i32>(r, c) - uniforms.pads;
// Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
// ? = to be determined. : = across all values in that axis.
var dotProd: array<vec4<f32>, ${this.workPerThread}>;
for (var i = 0; i < ${this.workPerThread}; i++) {
dotProd[i] = vec4<f32>(0.0);
}
for (var wR = 0; wR < uniforms.filterDims.x; wR = wR + 1) {
let dyR = f32(dyCorner.x + wR) / f32(uniforms.strides.x);
let wRPerm = uniforms.filterDims.x - 1 - wR;
if (dyR < 0.0 || dyR >= f32(uniforms.outBackprop[1]) ||
fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims.y; wC = wC + 1) {
let dyC = f32(dyCorner.y + wC) / f32(uniforms.strides.y);
let dyC2 = f32(dyCorner.y + 1 + wC) / f32(uniforms.strides.y);
let wCPerm = uniforms.filterDims.y - 1 - wC;
var bDyCVal = true;
var bDyCVal2 = true;
if (dyC < 0.0 || dyC >= f32(uniforms.outBackprop[2]) ||
fract(dyC) > 0.0) {
bDyCVal = false;
}
if (dyC2 < 0.0 || dyC2 >= f32(uniforms.outBackprop[2]) ||
fract(dyC2) > 0.0) {
bDyCVal2 = false;
}
let idyC = i32(dyC);
let idyC2 = i32(dyC2);
if (bDyCVal && bDyCVal2) {
let d2Length = uniforms.outBackprop[3];
for (var d2 = 0; d2 < d2Length; d2 = d2 + 4) {
let wValue0 = getW(wRPerm, wCPerm, d1, d2);
let wValue1 = getW(wRPerm, wCPerm, d1 + 1, d2);
let wValue2 = getW(wRPerm, wCPerm, d1 + 2, d2);
let wValue3 = getW(wRPerm, wCPerm, d1 + 3, d2);
var xValue = getDy(batch, idyR, idyC, d2);
let tmpval = vec4<f32>(dot(xValue, wValue0),
dot(xValue, wValue1),
dot(xValue, wValue2),
dot(xValue, wValue3));
dotProd[0] = dotProd[0] + tmpval;
xValue = getDy(batch, idyR, idyC2, d2);
dotProd[1] = dotProd[1] + vec4<f32>(dot(xValue, wValue0),
dot(xValue, wValue1),
dot(xValue, wValue2),
dot(xValue, wValue3));
}
} else if (bDyCVal) {
let d2Length = uniforms.outBackprop[3];
for (var d2 = 0; d2 < d2Length; d2 = d2 + 4) {
let wValue0 = getW(wRPerm, wCPerm, d1, d2);
let wValue1 = getW(wRPerm, wCPerm, d1 + 1, d2);
let wValue2 = getW(wRPerm, wCPerm, d1 + 2, d2);
let wValue3 = getW(wRPerm, wCPerm, d1 + 3, d2);
var xValue = getDy(batch, idyR, idyC, d2);
let tmpval = vec4<f32>(dot(xValue, wValue0),
dot(xValue, wValue1),
dot(xValue, wValue2),
dot(xValue, wValue3));
dotProd[0] = dotProd[0] + tmpval;
}
} else if (bDyCVal2) {
let d2Length = uniforms.outBackprop[3];
for (var d2 = 0; d2 < d2Length; d2 = d2 + 4) {
let wValue0 = getW(wRPerm, wCPerm, d1, d2);
let wValue1 = getW(wRPerm, wCPerm, d1 + 1, d2);
let wValue2 = getW(wRPerm, wCPerm, d1 + 2, d2);
let wValue3 = getW(wRPerm, wCPerm, d1 + 3, d2);
var xValue = getDy(batch, idyR, idyC2, d2);
let tmpval = vec4<f32>(dot(xValue, wValue0),
dot(xValue, wValue1),
dot(xValue, wValue2),
dot(xValue, wValue3));
dotProd[1] = dotProd[1] + tmpval;
}
}
}
}
for (var i = 0; i < ${this.workPerThread}; i = i + 1) {
let coords = vec4<i32>(batch, r, c + i, d1);
if (coordsInBounds4D(coords, uniforms.outShape)) {
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], dotProd[i]);
}
}
}
`;return this.isVec4?`
${n}
`:`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-01-06 19:23:06 +01:00
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d1 = coords[${o}];
2023-08-05 15:03:11 +02:00
let dyCorner = vec2<i32>(coords[${t}], coords[${e}]) - uniforms.pads;
2023-01-06 19:23:06 +01:00
let dyRCorner = dyCorner.x;
let dyCCorner = dyCorner.y;
// Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
for (var wR = 0; wR < uniforms.filterDims.x; wR = wR + 1) {
2023-05-08 15:12:41 +02:00
let dyR = (f32(dyRCorner) + f32(wR)) / f32(uniforms.strides.x);
2023-01-06 19:23:06 +01:00
let wRPerm = uniforms.filterDims.x - 1 - wR;
if (dyR < 0.0 || dyR >= f32(uniforms.outBackprop[1]) || fract(dyR) > 0.0 ||
wRPerm < 0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims.y; wC = wC + 1) {
2023-05-08 15:12:41 +02:00
let dyC = (f32(dyCCorner) + f32(wC)) / f32(uniforms.strides.y);
2023-01-06 19:23:06 +01:00
let wCPerm = uniforms.filterDims.y - 1 - wC;
if (dyC < 0.0 || dyC >= f32(uniforms.outBackprop[2]) ||
fract(dyC) > 0.0 || wCPerm < 0) {
continue;
}
let idyC = i32(dyC);
for (var d2 = 0; d2 < uniforms.outBackprop[3]; d2 = d2 + 1) {
2023-05-08 15:12:41 +02:00
let xValue = ${this.isChannelsLast?"getDy(batch, idyR, idyC, d2)":"getDy(batch, d2, idyR, idyC)"};
let wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd = dotProd + xValue * wValue;
2023-01-06 19:23:06 +01:00
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}},Cx=class{constructor(t){this.variableNames=["x","dy"],this.uniforms="pads : vec2<i32>, strides : vec2<i32>, batchSize : i32, outHeight : i32, outWidth : i32, inHeight : i32, inWidth : i32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.filterShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.isChannelsLast=t.dataFormat==="channelsLast",this.shaderKey=`conv2DDerFilter_${this.isChannelsLast}`}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-01-06 19:23:06 +01:00
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let wR = coords[0];
let wC = coords[1];
let d1 = coords[2];
let d2 = coords[3];
// Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
for (var b = 0; b < uniforms.batchSize; b = b + 1) {
for (var yR = 0; yR < uniforms.outHeight; yR = yR + 1) {
2023-05-08 15:12:41 +02:00
let xR = wR + yR * uniforms.strides[0] - uniforms.pads[0];
2023-01-06 19:23:06 +01:00
if (xR < 0 || xR >= uniforms.inHeight) {
continue;
}
for (var yC = 0; yC < uniforms.outWidth; yC = yC + 1) {
2023-05-08 15:12:41 +02:00
let xC = wC + yC * uniforms.strides[1] - uniforms.pads[1];
2023-01-06 19:23:06 +01:00
if (xC < 0 || xC >= uniforms.inWidth) {
continue;
}
if (${this.isChannelsLast}) {
let dyValue = getDy(b, yR, yC, d2);
let xValue = getX(b, xR, xC, d1);
dotProd = dotProd + xValue * dyValue;
} else {
let dyValue = getDy(b, d2, yR, yC);
let xValue = getX(b, d1, xR, xC);
dotProd = dotProd + xValue * dyValue;
}
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}},wx=class{constructor(t){this.variableNames=["x","dy"],this.uniforms=`pads : vec3<i32>, strides : vec3<i32>, batchSize : i32, outDepth : i32,
2023-08-05 15:03:11 +02:00
outHeight : i32, outWidth : i32, inDepth : i32, inHeight : i32, inWidth : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.filterShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="conv3DDerFilter"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let wF = coords.x;
let wR = coords.y;
let wC = coords.z;
let d1 = coords.w;
let d2 = coords.u;
var dotProd = 0.0;
for (var b = 0; b < uniforms.batchSize; b++) {
for (var yF = 0; yF < uniforms.outDepth; yF++) {
let xF = wF + yF * uniforms.strides[0] - uniforms.pads[0];
if (xF < 0 || xF >= uniforms.inDepth) {
continue;
}
for (var yR = 0; yR < uniforms.outHeight; yR++) {
let xR = wR + yR * uniforms.strides[1] - uniforms.pads[1];
if (xR < 0 || xR >= uniforms.inHeight) {
continue;
}
for (var yC = 0; yC < uniforms.outWidth; yC++) {
let xC = wC + yC * uniforms.strides[2] - uniforms.pads[2];
if (xC < 0 || xC >= uniforms.inWidth) {
continue;
}
let dyValue = getDy(b, yF, yR, yC, d2);
let xValue = getX(b, xF, xR, xC, d1);
dotProd += xValue * dyValue;
}
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}},Sx=class{constructor(t){this.variableNames=["dy","W"],this.uniforms=`filterDims : vec3<i32>, pads : vec3<i32>, strides : vec3<i32>,
2023-08-05 15:03:11 +02:00
outDepth : i32, outHeight : i32, outWidth : i32, outChannels : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.inShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="conv3DDerInput"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords.x;
let d1 = coords.u;
let dyCorner = vec3<i32>(coords.y, coords.z, coords.w) - uniforms.pads;
let dyFCorner = dyCorner.x;
let dyRCorner = dyCorner.y;
let dyCCorner = dyCorner.z;
var dotProd = 0.0;
for (var wF = 0; wF < uniforms.filterDims[0]; wF++) {
let dyF = f32(dyFCorner + wF) / f32(uniforms.strides[0]);
if (dyF < 0.0 || dyF >= f32(uniforms.outDepth) || fract(dyF) > 0.0) {
continue;
}
let idyF = i32(dyF);
let wFPerm = uniforms.filterDims[0] - 1 - wF;
for (var wR = 0; wR < uniforms.filterDims[1]; wR++) {
let dyR = f32(dyRCorner + wR) / f32(uniforms.strides[1]);
if (dyR < 0.0 || dyR >= f32(uniforms.outHeight) || fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
let wRPerm = uniforms.filterDims[1] - 1 - wR;
for (var wC = 0; wC < uniforms.filterDims[2]; wC++) {
let dyC = f32(dyCCorner + wC) / f32(uniforms.strides[2]);
if (dyC < 0.0 || dyC >= f32(uniforms.outWidth) || fract(dyC) > 0.0) {
continue;
}
let idyC = i32(dyC);
let wCPerm = uniforms.filterDims[2] - 1 - wC;
for (var d2 = 0; d2 < uniforms.outChannels; d2++) {
let xValue = getDy(batch, idyF, idyR, idyC, d2);
let wValue = getW(wFPerm, wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
}
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}};function Iue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,dy:s}=t,{strides:a,pad:i,dataFormat:p,dimRoundingMode:u,filterShape:c}=o,l=w.convertConv2DDataFormat(p),m=w.computeConv2DInfo(n.shape,c,a,1,i,u,!1,l),d=new Cx(m),f=[{type:"int32",data:[m.padInfo.top,m.padInfo.left]},{type:"int32",data:[m.strideHeight,m.strideWidth]},{type:"int32",data:[m.batchSize]},{type:"int32",data:[m.outHeight]},{type:"int32",data:[m.outWidth]},{type:"int32",data:[m.inHeight]},{type:"int32",data:[m.inWidth]}];return e.runWebGPUProgram(d,[n,s],n.dtype,f)}var IV={kernelName:Ai,backendName:"webgpu",kernelFunc:Iue};function vue(r=4){let t=s=>{switch(s){case 1:return"return W[getIndexFromCoords4D(coord, uniforms.wShape)];";case 4:return`
2022-11-18 17:13:29 +01:00
let coord1 = vec4<i32>(coordX, coordY, col + 1, rowInner);
let coord2 = vec4<i32>(coordX, coordY, col + 2, rowInner);
let coord3 = vec4<i32>(coordX, coordY, col + 3, rowInner);
let v0 = W[getIndexFromCoords4D(coord, uniforms.wShape)];
let v1 = W[getIndexFromCoords4D(coord1, uniforms.wShape)];
let v2 = W[getIndexFromCoords4D(coord2, uniforms.wShape)];
let v3 = W[getIndexFromCoords4D(coord3, uniforms.wShape)];
return vec4<f32>(v0, v1, v2, v3);
`;default:throw new Error(`innerElementSize ${s} is not supported.`)}},o=`if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
${`
let outRow = row / uniforms.outShape[2];
let outCol = row % uniforms.outShape[2];
let WRow = col / (uniforms.filterDims[1] * uniforms.outBackprop[3]);
let WCol = col / uniforms.outBackprop[3] % uniforms.filterDims[1];
2023-05-08 15:12:41 +02:00
let xR = f32(outRow - uniforms.pads[0] + WRow) / f32(uniforms.strides[0]);
let xC = f32(outCol - uniforms.pads[1] + WCol) / f32(uniforms.strides[1]);
2022-11-18 17:13:29 +01:00
if (xR < 0.0 || xR >= f32(uniforms.outBackprop[1]) || fract(xR) > 0.0) {
2023-05-08 15:12:41 +02:00
return ${Ae(r)}(0.0);
2022-11-18 17:13:29 +01:00
}
if (xC < 0.0 || xC >= f32(uniforms.outBackprop[2]) || fract(xC) > 0.0) {
2023-05-08 15:12:41 +02:00
return ${Ae(r)}(0.0);
2022-11-18 17:13:29 +01:00
}
let coord = vec4<i32>(
batch,
i32(xR),
i32(xC),
col % uniforms.outBackprop[3]);
return x[getIndexFromCoords4D(coord, uniforms.xShape)/${r}];`}
}
2023-05-08 15:12:41 +02:00
return ${Ae(r)}(0.0);`;return`
2023-08-05 15:03:11 +02:00
fn mm_readA(batch: i32, row : i32, col : i32) -> ${Ae(r)} {
2022-11-18 17:13:29 +01:00
${o}
}
2023-08-05 15:03:11 +02:00
fn mm_readB(batch: i32, row : i32, col : i32) -> ${Ae(r)} {
2022-11-18 17:13:29 +01:00
let coordX = uniforms.filterDims.x - 1 -
row / (uniforms.filterDims[1] * uniforms.outBackprop[3]);
let coordY = uniforms.filterDims.y - 1 -
(row / uniforms.outBackprop[3]) % uniforms.filterDims[1];
if (row < uniforms.dimInner && col < uniforms.dimBOuter &&
coordX >= 0 && coordY >= 0) {
let rowInner = row % uniforms.outBackprop[3];
let coord = vec4<i32>(coordX, coordY, col, rowInner);
2023-08-05 15:03:11 +02:00
${t(r)}
2022-11-18 17:13:29 +01:00
}
2023-05-08 15:12:41 +02:00
return ${Ae(r)}(0.0);
2022-11-18 17:13:29 +01:00
}
2023-08-05 15:03:11 +02:00
fn mm_write(batch: i32, row : i32, col : i32, valueInput : ${Ae(r)}) {
if (row < uniforms.dimAOuter && col < uniforms.dimBOuter) {
2022-11-18 17:13:29 +01:00
var value = valueInput;
let outCoord = vec4<i32>(
batch,
row / uniforms.outShape[2],
row % uniforms.outShape[2],
col);
result[getIndexFromCoords4D(outCoord, uniforms.outShape)/${r}] = value;
}
}`}var Ix=class{constructor(t){this.variableNames=["x","W"],this.uniforms="filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, outBackprop : vec4<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=t.inShape,y.assert(t.dataFormat==="channelsLast",()=>"TODO: NCHW is unimplemented"),this.isVec4=t.inChannels%4===0&&t.outChannels%4===0,this.dispatchLayout={x:[3],y:[1,2],z:[0]},this.workgroupSize=pm(this.dispatchLayout,this.outputShape,this.isVec4),this.elementsPerThread=cm(this.dispatchLayout,this.outputShape,this.isVec4),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,this.elementsPerThread),this.isVec4&&(this.outputComponent=4,this.variableComponents=[4,1]),this.shaderKey=`conv2DDerInputMM_${this.isVec4}_${this.elementsPerThread}`}getUserCode(){let t=this.isVec4?Np(this.elementsPerThread,this.workgroupSize):Tp(this.elementsPerThread,this.workgroupSize);return`
2023-09-18 18:44:36 +02:00
${vue(this.isVec4?4:1)}
2023-08-05 15:03:11 +02:00
${t}
`}};function kue(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,filter:s}=t,{inputShape:a,strides:i,pad:p,dataFormat:u,dimRoundingMode:c}=o,l=w.convertConv2DDataFormat(u),m=w.computeConv2DInfo(a,s.shape,i,1,p,c,!1,l),d=[{type:"int32",data:[m.filterHeight,m.filterWidth]},{type:"int32",data:[m.filterHeight-1-m.padInfo.top,m.filterWidth-1-m.padInfo.left]},{type:"int32",data:[m.strideHeight,m.strideWidth]},{type:"int32",data:[m.batchSize,m.outHeight,m.outWidth,m.outChannels]}],f;if(A().getBool("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE")||m.dataFormat!=="channelsLast")f=new bx(m);else{f=new Ix(m);let h=m.inHeight*m.inWidth,g=m.inChannels,x=m.filterHeight*m.filterWidth*m.outChannels;d.push({type:"uint32",data:[h]},{type:"uint32",data:[g]},{type:"uint32",data:[x]})}return e.runWebGPUProgram(f,[n,s],"float32",d)}var vV={kernelName:rn,backendName:"webgpu",kernelFunc:kue};var vx=class{constructor(t){this.variableNames=["x","W"],this.uniforms="filterDims: vec3<i32>, pads: vec3<i32>, strides: vec3<i32>, dilations: vec3<i32>,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.outShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="conv3dnaive"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
2023-05-08 15:12:41 +02:00
let coords = getOutputCoords();
let batch = coords.x;
let d2 = coords.u;
let xFRCCorner = vec3<i32>(coords.y, coords.z, coords.w) * uniforms.strides - uniforms.pads;
let xFCorner = xFRCCorner.x;
let xRCorner = xFRCCorner.y;
let xCCorner = xFRCCorner.z;
let inputDepthNearestVec4 = (uniforms.xShape.u / 4) * 4;
let inputDepthVec4Remainder = uniforms.xShape.u % 4;
var dotProd = 0.0;
for (var wF = 0; wF < uniforms.filterDims[0]; wF++) {
let xF = xFCorner + wF * uniforms.dilations[0];
if (xF < 0 || xF >= uniforms.xShape.y) {
continue;
}
for (var wR = 0; wR < uniforms.filterDims[1]; wR++) {
let xR = xRCorner + wR * uniforms.dilations[1];
if (xR < 0 || xR >= uniforms.xShape.z) {
continue;
}
for (var wC = 0; wC < uniforms.filterDims[2]; wC++) {
let xC = xCCorner + wC * uniforms.dilations[2];
if (xC < 0 || xC >= uniforms.xShape.w) {
continue;
}
for (var d1 = 0; d1 < inputDepthNearestVec4; d1 += 4) {
let xValues = vec4<f32>(
getX(batch, xF, xR, xC, d1),
getX(batch, xF, xR, xC, d1 + 1),
getX(batch, xF, xR, xC, d1 + 2),
getX(batch, xF, xR, xC, d1 + 3)
);
let wValues = vec4<f32>(
getW(wF, wR, wC, d1, d2),
getW(wF, wR, wC, d1 + 1, d2),
getW(wF, wR, wC, d1 + 2, d2),
getW(wF, wR, wC, d1 + 3, d2)
);
dotProd += dot(xValues, wValues);
}
if (inputDepthVec4Remainder == 1) {
dotProd += getX(batch, xF, xR, xC, inputDepthNearestVec4) *
getW(wF, wR, wC, inputDepthNearestVec4, d2);
} else if (inputDepthVec4Remainder == 2) {
let xValues = vec2<f32>(
getX(batch, xF, xR, xC, inputDepthNearestVec4),
getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1)
);
let wValues = vec2<f32>(
getW(wF, wR, wC, inputDepthNearestVec4, d2),
getW(wF, wR, wC, inputDepthNearestVec4 + 1, d2)
);
dotProd += dot(xValues, wValues);
} else if (inputDepthVec4Remainder == 3) {
let xValues = vec3<f32>(
getX(batch, xF, xR, xC, inputDepthNearestVec4),
getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1),
getX(batch, xF, xR, xC, inputDepthNearestVec4 + 2)
);
let wValues = vec3<f32>(
getW(wF, wR, wC, inputDepthNearestVec4, d2),
getW(wF, wR, wC, inputDepthNearestVec4 + 1, d2),
getW(wF, wR, wC, inputDepthNearestVec4 + 2, d2)
);
dotProd += dot(xValues, wValues);
}
}
}
}
setOutputAtIndex(index, dotProd);
}
}`}};function Nue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s}=t,{strides:a,pad:i,dilations:p}=o,u=w.computeConv3DInfo(n.shape,s.shape,a,p,i),c=[u.padInfo.front,u.padInfo.top,u.padInfo.left],l=[{type:"int32",data:[u.filterDepth,u.filterHeight,u.filterWidth]},{type:"int32",data:[...c]},{type:"int32",data:[u.strideDepth,u.strideHeight,u.strideWidth]},{type:"int32",data:[u.dilationDepth,u.dilationHeight,u.dilationWidth]}],m=new vx(u),d=dt(n.dtype,s.dtype);return e.runWebGPUProgram(m,[n,s],d,l)}var kV={kernelName:on,backendName:"webgpu",kernelFunc:Nue};function Tue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,dy:s}=t,{strides:a,pad:i,filterShape:p}=o,u=w.computeConv3DInfo(n.shape,p,a,1,i),c=new wx(u),l=[{type:"int32",data:[u.padInfo.front,u.padInfo.top,u.padInfo.left]},{type:"int32",data:[u.strideDepth,u.strideHeight,u.strideWidth]},{type:"int32",data:[u.batchSize]},{type:"int32",data:[u.outDepth]},{type:"int32",data:[u.outHeight]},{type:"int32",data:[u.outWidth]},{type:"int32",data:[u.inDepth]},{type:"int32",data:[u.inHeight]},{type:"int32",data:[u.inWidth]}];return e.runWebGPUProgram(c,[n,s],s.dtype,l)}var NV={kernelName:ja,backendName:"webgpu",kernelFunc:Tue};function _ue(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,filter:s}=t,{strides:a,pad:i,inputShape:p}=o,u=w.computeConv3DInfo(p,s.shape,a,1,i),c=new Sx(u),l=[{type:"int32",data:[u.filterDepth,u.filterHeight,u.filterWidth]},{type:"int32",data:[u.filterDepth-1-u.padInfo.front,u.filterHeight-1-u.padInfo.top,u.filterWidth-1-u.padInfo.left]},{type:"int32",data:[u.strideDepth,u.strideHeight,u.strideWidth]},{type:"int32",data:[u.outDepth]},{type:"int32",data:[u.outHeight]},{type:"int32",data:[u.outWidth]},{type:"int32",data:[u.outChannels]}];return e.runWebGPUProgram(c,[n,s],n.dtype,l)}var TV={kernelName:nn,backendName:"webgpu",kernelFunc:_ue};var $ue=ye({opType:Z.COS}),_V={kernelName:sn,backendName:"webgpu",kernelFunc:$ue};var Eue=ye({opType:Z.COSH}),$V={kernelName:an,backendName:"webgpu",kernelFunc:Eue};var kx=class{constructor(t,e,o,n){this.variableNames=["Image","Boxes","BoxInd"],this.uniforms="extrapolationValue : f32,",this.workgroupSize=[64,1,1],this.size=!0;let[s]=e;this.outputShape=[s,o[0],o[1],t],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.methodId=n==="bilinear"?1:0,this.cropHeightBiggerThan1=this.outputShape[1]>1,this.cropWidthBiggerThan1=this.outputShape[2]>1,this.shaderKey=`cropAndResize_${this.methodId}_${this.cropHeightBiggerThan1}_${this.cropWidthBiggerThan1}`}getUserCode(){let[t,e]=["f32(uniforms.imageShape[1] - 1)","f32(uniforms.imageShape[2] - 1)"],[o,n,s]=this.cropHeightBiggerThan1?[`(${t} / f32(uniforms.outShape[1] - 1))`,"(y2-y1) * height_ratio",`y1*${t} + f32(y)*(height_scale)`]:["0.0","0.0",`0.5 * (y1+y2) * ${t}`],[a,i,p]=this.cropWidthBiggerThan1?[`(${e} / f32(uniforms.outShape[2] - 1))`,"(x2-x1) * width_ratio",`x1*${e} + f32(x)*(width_scale)`]:["0.0","0.0",`0.5 * (x1+x2) * ${e}`];return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let height_ratio = f32(${o});
let width_ratio = f32(${a});
let b = coords[0];
let y = coords[1];
let x = coords[2];
let d = coords[3];
// get box vals
let y1 = getBoxes(b, 0);
let x1 = getBoxes(b, 1);
let y2 = getBoxes(b, 2);
let x2 = getBoxes(b, 3);
// get image in batch index
let bInd = i32(round(getBoxInd(b)));
if(bInd < 0 || bInd >= uniforms.outShape[0]) {
return;
}
let height_scale = ${n};
2022-11-18 17:13:29 +01:00
let width_scale = ${i};
let in_y = ${s};
2023-08-05 15:03:11 +02:00
if( in_y < 0.0 || in_y > ${t} ) {
2022-11-18 17:13:29 +01:00
setOutputAtIndex(index, uniforms.extrapolationValue);
return;
}
let in_x = ${p};
2023-08-05 15:03:11 +02:00
if( in_x < 0.0 || in_x > ${e} ) {
2022-11-18 17:13:29 +01:00
setOutputAtIndex(index, uniforms.extrapolationValue);
return;
}
let sourceFracIndexCR = vec2<f32>(in_x,in_y);
if(${this.methodId} == 1) {
// Compute the four integer indices.
let sourceFloorCR = vec2<i32>(sourceFracIndexCR);
let sourceCeilCR = vec2<i32>(ceil(sourceFracIndexCR));
let topLeft = getImage(bInd, sourceFloorCR.y, sourceFloorCR.x, d);
let bottomLeft = getImage(bInd, sourceCeilCR.y, sourceFloorCR.x, d);
let topRight = getImage(bInd, sourceFloorCR.y, sourceCeilCR.x, d);
let bottomRight = getImage(bInd, sourceCeilCR.y, sourceCeilCR.x, d);
let fracCR = sourceFracIndexCR - vec2<f32>(sourceFloorCR);
let top = topLeft + (topRight - topLeft) * fracCR.x;
let bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR.x;
let newValue = top + (bottom - top) * fracCR.y;
setOutputAtIndex(index, newValue);
} else {
// Compute the coordinators of nearest neighbor point.
let sourceNearestCR = vec2<i32>(floor(
sourceFracIndexCR + vec2<f32>(0.5,0.5)));
let newValue = getImage(
bInd, sourceNearestCR.y, sourceNearestCR.x, d);
setOutputAtIndex(index, newValue);
}
}
}
`}};var Rue=r=>{let{inputs:t,backend:e,attrs:o}=r,{image:n,boxes:s,boxInd:a}=t,{cropSize:i,method:p,extrapolationValue:u}=o,c=new kx(n.shape[3],s.shape,i,p),l=[{type:"float32",data:[u]}];return e.runWebGPUProgram(c,[n,s,a],"float32",l)},EV={kernelName:cn,backendName:"webgpu",kernelFunc:Rue};var Ep;(function(r){r.Prod="*",r.Sum="+"})(Ep||(Ep={}));var hm=class{constructor(t,e,o,n){this.variableNames=["x"],this.uniforms="index : f32,",this.size=!0,this.workgroupSize=[128,1,1],this.outputShape=e,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.exclusive=o,this.reverse=n,this.op=t,this.shaderKey=`cum_${this.op}_${this.exclusive}_${this.reverse}`}getUserCode(){let t=this.outputShape.length,e=this.op===Ep.Prod?"1.0":"0.0",o=this.exclusive?e:`getX(${RV(t,"coords",this.op)})`,n=this.outputShape[this.outputShape.length-1],s="",a="";return this.exclusive?(s=this.reverse?`end != ${n-1}`:"end != 0",a=this.reverse?"end + 1":"end - 1"):(s=this.reverse?`end + pow2 < ${n}`:"end >= pow2",a=this.reverse?"end + pow2":"end - pow2"),`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
var coords = getCoordsFromIndex(index);
let end = ${DV(t,"coords",this.op)};
2022-11-18 17:13:29 +01:00
var val = ${o};
let pow2 = i32(pow(2.0, uniforms.index));
if (${s}) {
let idx = ${a};
${DV(t,"coords",this.op)} = idx;
val ${this.op}= getX(${RV(t,"coords",this.op)});
2022-11-18 17:13:29 +01:00
}
setOutputAtIndex(index, val);
}
}
`}};function RV(r,t,e){if(r===1)return`${t}`;if(r===2)return`${t}.x, ${t}.y`;if(r===3)return`${t}.x, ${t}.y, ${t}.z`;if(r===4)return`${t}.x, ${t}.y, ${t}.z, ${t}.w`;throw Error(`Cumulative ${e} for rank ${r} is not yet supported`)}function DV(r,t,e){if(r===1)return`${t}`;if(r===2)return`${t}.y`;if(r===3)return`${t}.z`;if(r===4)return`${t}.w`;throw Error(`Cumulative ${e} for rank ${r} is not yet supported`)}function Nx(r,t,e,o,n,s){let a=t.shape.length,i=w.getAxesPermutation([o],a),p=t;i!=null&&(p=xr({inputs:{x:t},backend:e,attrs:{perm:i}}));let u=w.getInnerMostAxes(1,a)[0];if(u!==a-1)throw new Error(`WebGPU cumprod shader expects an inner-most axis=${t.shape.length-1} but got axis=${o}`);let c=p.shape[u],l=At({inputs:{x:p},backend:e});for(let m=0;m<=Math.ceil(Math.log2(c))-1;m++){let d=new hm(r,p.shape,!1,s),f=l,h=[{type:"float32",data:[m]}];l=e.runWebGPUProgram(d,[l],l.dtype,h),e.disposeData(f.dataId)}if(n){let m=new hm(r,p.shape,n,s),d=l,f=[{type:"float32",data:[0]}];l=e.runWebGPUProgram(m,[l],l.dtype,f),e.disposeData(d.dataId)}if(i!=null){let m=w.getUndoAxesPermutation(i),d=xr({inputs:{x:l},backend:e,attrs:{perm:m}});return e.disposeData(l.dataId),e.disposeData(p.dataId),d}return l}function Due(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,exclusive:a,reverse:i}=o;return Nx(Ep.Prod,n,e,s,a,i)}var AV={kernelName:un,backendName:"webgpu",kernelFunc:Due};function Aue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,exclusive:a,reverse:i}=o;return Nx(Ep.Sum,n,e,s,a,i)}var FV={kernelName:pn,backendName:"webgpu",kernelFunc:Aue};function Fue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,weights:s}=t,{size:a,binaryOutput:i}=o,p=n.shape.length===1,c=y.sizeFromShape(s.shape)>0,l=s.dtype,m=p?[n.shape[0]]:[n.shape[0],n.shape[1]],d=p?[a]:[n.shape[0],a],f=vt({backend:e,attrs:{shape:d,value:0,dtype:l}}),h=new Xc(m,c,i),g=[{type:"int32",data:[a]}],x=c?[n,s]:[n];return e.runWebGPUProgram(h,x,l,g,f)}var PV={kernelName:ra,backendName:"webgpu",kernelFunc:Fue};var Tx=class{constructor(t,e){this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.uniforms="blockSize : i32,",this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey=`depthToSpace_${e}`,this.dataFormat=e}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let h = ${this.getHeightCoordString()};
let w = ${this.getWidthCoordString()};
let d = ${this.getDepthCoordString()};
let in_h = h / uniforms.blockSize;
let offset_h = h % uniforms.blockSize;
let in_w = w / uniforms.blockSize;
let offset_w = w % uniforms.blockSize;
let offset_d = (offset_h * uniforms.blockSize + offset_w) *
${this.getOutputDepthSize()};
let in_d = d + offset_d;
let rlt = ${this.getInputSamplingString()};
setOutputAtIndex(index, rlt);
}
}`}getHeightCoordString(){return this.dataFormat==="NHWC"?"coords[1]":"coords[2]"}getWidthCoordString(){return this.dataFormat==="NHWC"?"coords[2]":"coords[3]"}getDepthCoordString(){return this.dataFormat==="NHWC"?"coords[3]":"coords[1]"}getOutputDepthSize(){return this.dataFormat==="NHWC"?"uniforms.outShape[3]":"uniforms.outShape[1]"}getInputSamplingString(){return this.dataFormat==="NHWC"?"getX(b, in_h, in_w, in_d)":"getX(b, in_d, in_h, in_w)"}};function Pue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{blockSize:s,dataFormat:a}=o,i=n.shape[0],p=a==="NHWC"?n.shape[1]:n.shape[2],u=a==="NHWC"?n.shape[2]:n.shape[3],c=a==="NHWC"?n.shape[3]:n.shape[1],l=p*s,m=u*s,d=c/(s*s),f=a==="NHWC"?[i,l,m,d]:[i,d,l,m],h=[{type:"int32",data:[s]}],g=new Tx(f,a);return e.runWebGPUProgram(g,[n],n.dtype,h)}var OV={kernelName:ln,backendName:"webgpu",kernelFunc:Pue};var _x=class{constructor(t,e,o,n=!1,s=null,a=!1){this.variableNames=["x","W"],this.uniforms="pads : vec2<i32>, inDims : vec2<i32>,",this.workgroupSize=[16,16,1],this.outputShape=t,this.dispatchLayout={x:[3],y:[2],z:[0,1]},this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),n&&this.variableNames.push("bias"),a&&this.variableNames.push("preluActivationWeights"),this.addBias=n,this.activation=s,this.hasPreluActivation=a,this.filterHeight=e,this.filterWidth=o,this.shaderKey=`depthwiseNCHW_${this.activation}_${this.filterHeight}_${this.filterWidth}`}getUserCode(){let t=this.filterWidth*this.filterHeight,e=this.workgroupSize[0]*this.workgroupSize[1]*this.workgroupSize[2],o=this.workgroupSize[1]+this.filterHeight-1,n=this.workgroupSize[0]+this.filterWidth-1;return`
${dr(this.activation,this.hasPreluActivation,!1,4)}
2022-11-18 17:13:29 +01:00
var<workgroup> mm_Asub : array<array<f32, ${n}>, ${o}>;
var<workgroup> mm_Bsub : array<array<f32, ${this.filterWidth}>, ${this.filterHeight}>;
fn readX(batch : i32, channel : i32, row : i32, col : i32) -> f32 {
var value = 0.0;
if (row >=0 && row < uniforms.inDims[0] && col >=0 && col < uniforms.inDims[1])
{
value = getX(batch, channel, row, col);
}
return value;
}
2023-08-05 15:03:11 +02:00
${G()} {
2022-11-18 17:13:29 +01:00
let coords = getOutputCoords();
let batch = coords[0];
2023-05-08 15:12:41 +02:00
let xRCCorner = vec2<i32>(coords.zw) - uniforms.pads;
2022-11-18 17:13:29 +01:00
let channelMul = uniforms.wShape[3];
let d1 = coords[1] / channelMul;
let q = coords[1] % channelMul;
let inputRowStart = xRCCorner.x;
let inputColStart = xRCCorner.y;
let localRow = i32(localId.y);
let localCol = i32(localId.x);
// Load one tile of X into local memory.
2022-11-20 22:20:02 +01:00
for (var inputRow = localRow; inputRow < ${o}; inputRow = inputRow + ${this.workgroupSize[1]}) {
for (var inputCol = localCol; inputCol < ${n}; inputCol = inputCol + ${this.workgroupSize[0]}) {
2022-11-18 17:13:29 +01:00
let rowOffset = inputRow - localRow;
let colOffset = inputCol - localCol;
mm_Asub[inputRow][inputCol] = readX(batch, d1, inputRowStart + rowOffset, inputColStart + colOffset);
}
}
// Load one tile of W into local memory.
2022-11-20 22:20:02 +01:00
var wIndex = i32(localIndex);
2023-08-05 15:03:11 +02:00
${t<e?`if (wIndex < ${t})`:`for(; wIndex < ${t}; wIndex = wIndex + ${e})`}
2022-11-18 17:13:29 +01:00
{
let wRow = wIndex / ${this.filterWidth};
let wCol = wIndex % ${this.filterWidth};
mm_Bsub[wRow][wCol] = getW(wRow, wCol, d1, q);
}
workgroupBarrier();
var value = 0.0;
for (var wR = 0; wR < ${this.filterHeight}; wR = wR + 1) {
for (var wC = 0; wC < ${this.filterWidth}; wC = wC + 1) {
let xVal = mm_Asub[localRow + wR][localCol + wC];
let wVal = mm_Bsub[wR][wC];
value = fma(xVal, wVal, value);
}
}
${Qr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
if (coordsInBounds4D(coords, uniforms.outShape)) {
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
`}};var Qc=class{constructor(t,e=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms="pads : vec2<i32>, inDims : vec2<i32>, virtualWidth : i32,",this.workgroupSize=[64,1,1],this.workPerThread=4,this.outputComponent=4,this.outputShape=t.outShape,this.virtualWidth=Math.ceil(this.outputShape[2]/this.workPerThread)*this.workPerThread;let s=[this.outputShape[0],this.outputShape[1],this.virtualWidth,this.outputShape[3]];this.dispatchLayout=X(s),this.dispatch=H(this.dispatchLayout,s,this.workgroupSize,[this.outputComponent*this.workPerThread,1,1]),y.assert(t.dataFormat==="channelsLast",()=>"TODO: NCHW is unimplemented"),e&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.convInfo=t,this.addBias=e,this.activation=o,this.hasPreluActivation=n,this.shaderKey=`depthwiseVec4_${o}_${this.convInfo.filterHeight}_${this.convInfo.filterWidth}_${this.convInfo.strideHeight}_${this.convInfo.strideWidth}_${this.workPerThread}`}getUserCode(){let t=(this.workPerThread-1)*this.convInfo.strideWidth+this.convInfo.filterWidth,e=this.convInfo.strideHeight,o=this.convInfo.strideWidth;return`
${dr(this.activation,this.hasPreluActivation,!0,4)}
2022-11-18 17:13:29 +01:00
fn readX(batch : i32, row : i32, col : i32, channel : i32) -> vec4<f32> {
var value = vec4<f32>(0.0);
if (col >=0 && col < uniforms.inDims[1]) {
value = getX(batch, row, col, channel);
}
return value;
}
2023-08-05 15:03:11 +02:00
${G("index")} {
let width0 = uniforms.outShape[3] / ${this.outputComponent};
let d1 = (index % width0) * ${this.outputComponent};
var index1 = index / width0;
let width1 = uniforms.virtualWidth / ${this.workPerThread};
let c = (index1 % width1) * ${this.workPerThread};
index1 = index1 / width1;
let r = index1 % uniforms.outShape[1];
let batch = index1 / uniforms.outShape[1];
2023-08-05 15:03:11 +02:00
let xRCCorner = vec2<i32>(r, c) * vec2<i32>(${e}, ${o}) - uniforms.pads;
2022-11-18 17:13:29 +01:00
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
2023-08-05 15:03:11 +02:00
var xVals : array<vec4<f32>, ${t}>;
2022-11-18 17:13:29 +01:00
var dotProd : array<vec4<f32>, ${this.workPerThread}>;
for (var i = 0; i < ${this.workPerThread}; i++) {
dotProd[i] = vec4<f32>(0.0);
}
// Use constant instead of uniform can give better performance.
for (var wR = 0; wR < ${this.convInfo.filterHeight}; wR = wR + 1) {
let xR = xRCorner + wR;
if (xR >=0 && xR < uniforms.inDims[0]) {
2023-08-05 15:03:11 +02:00
for (var i = 0; i < ${t}; i++) {
2022-11-18 17:13:29 +01:00
xVals[i] = readX(batch, xR, xCCorner + i, d1);
}
for (var wC = 0; wC < ${this.convInfo.filterWidth}; wC = wC + 1) {
let wValue = getW(wR, wC, d1, 0);
for (var i = 0; i < ${this.workPerThread}; i++) {
2023-01-06 19:23:06 +01:00
dotProd[i] = fma(xVals[i * ${o} + wC], wValue, dotProd[i]);
2022-11-18 17:13:29 +01:00
}
}
}
}
for (var i = 0; i < ${this.workPerThread}; i = i + 1) {
let coords = vec4<i32>(batch, r, c + i, d1);
if (coordsInBounds4D(coords, uniforms.outShape)) {
var value = dotProd[i];
${Qr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
}
`}};var Zc=class{constructor(t,e=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms=`pads : vec2<i32>, inDims : vec2<i32>, filterHeight : i32,
2023-08-05 15:03:11 +02:00
filterWidth : i32, strides : vec2<i32>, dilations : vec2<i32>,`,this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=t.outShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.isChannelsLast=t.dataFormat==="channelsLast",e&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.convInfo=t,this.addBias=e,this.activation=o,this.hasPreluActivation=n,this.shaderKey=`depthwise_${this.activation}_${this.isChannelsLast}`}getUserCode(){let t=this.isChannelsLast?"getX(batch, xR, xC, d1);":"getX(batch, d1, xR, xC);";return`
${dr(this.activation,this.hasPreluActivation,!1,4)}
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let batch = coords[0];
2023-05-08 15:12:41 +02:00
let xRCCorner = vec2<i32>(coords.${this.isChannelsLast?"yz":"zw"}) * uniforms.strides - uniforms.pads;
2022-11-20 22:20:02 +01:00
let d2 = coords[${this.isChannelsLast?3:1}];
let channelMul = uniforms.wShape[3];
let d1 = d2 / channelMul;
let q = d2 % channelMul;
let inputRowStart = xRCCorner.x;
let inputColStart = xRCCorner.y;
let inputRowEnd = inputRowStart + uniforms.filterHeight *
2023-05-08 15:12:41 +02:00
uniforms.dilations[0];
2022-11-20 22:20:02 +01:00
let inputColEnd = inputColStart + uniforms.filterWidth *
2023-05-08 15:12:41 +02:00
uniforms.dilations[1];
2022-11-20 22:20:02 +01:00
// Convolve x(?, ?, d1)|x(d1, ?, ?) with w(:, :, d1, q) to get
// y(yR, yC, d2)|y(d2, yR, yC). ? = to be determined. : = across all
// values in that axis. x(?, ?, d1) and y(yR, yC, d2) is for NHWC.
// x(d1, ?, ?) and y(d2, yR, yC) is for NCHW.
var value = 0.0;
// Extract if checking out of for loop for performance.
if (inputRowStart >= 0 && inputColStart >= 0 &&
inputRowEnd < uniforms.inDims[0] &&
inputColEnd < uniforms.inDims[1]) {
for (var wR = 0; wR < uniforms.filterHeight; wR = wR + 1) {
2023-05-08 15:12:41 +02:00
let xR = inputRowStart + wR * uniforms.dilations[0];
2022-11-20 22:20:02 +01:00
for (var wC = 0; wC < uniforms.filterWidth; wC = wC + 1) {
2023-05-08 15:12:41 +02:00
let xC = inputColStart + wC * uniforms.dilations[1];
2022-11-20 22:20:02 +01:00
2023-08-05 15:03:11 +02:00
let xVal = ${t};
2022-11-20 22:20:02 +01:00
let wVal = getW(wR, wC, d1, q);
value = value + xVal * wVal;
}
}
} else {
for (var wR = 0; wR < uniforms.filterHeight; wR = wR + 1) {
2023-05-08 15:12:41 +02:00
let xR = inputRowStart + wR * uniforms.dilations[0];
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
if (xR < 0 || xR >= uniforms.inDims[0]) {
continue;
}
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
for (var wC = 0; wC < uniforms.filterWidth; wC = wC + 1) {
2023-05-08 15:12:41 +02:00
let xC = inputColStart + wC * uniforms.dilations[1];
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
if (xC < 0 || xC >= uniforms.inDims[1]) {
continue;
}
2022-11-18 17:13:29 +01:00
2023-08-05 15:03:11 +02:00
let xVal = ${t};
2022-11-20 22:20:02 +01:00
let wVal = getW(wR, wC, d1, q);
value = value + xVal * wVal;
}
2022-11-18 17:13:29 +01:00
}
}
${Qr(this.addBias,this.activation)}
2022-11-20 22:20:02 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
`}};function Oue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s}=t,{strides:a,pad:i,dataFormat:p,dilations:u,dimRoundingMode:c}=o,l=w.convertConv2DDataFormat(p),m=u;m==null&&(m=[1,1]);let d=w.computeConv2DInfo(n.shape,s.shape,a,m,i,c,!0,l),f=[{type:"int32",data:[d.padInfo.top,d.padInfo.left]},{type:"int32",data:[d.inHeight,d.inWidth]}],h=d.dataFormat==="channelsLast",g;return!h&&d.inHeight>16&&d.inWidth>16&&d.strideHeight===1&&d.strideWidth===1&&d.dilationWidth===1&&d.dilationHeight===1&&d.inChannels===d.outChannels?g=new _x(d.outShape,d.filterHeight,d.filterWidth):h&&d.outHeight>4&&d.outWidth>4&&d.strideWidth<=2&&d.inChannels===d.outChannels&&d.dilationHeight===1&&d.dilationWidth===1&&d.inChannels%4===0?(g=new Qc(d),f.push({type:"int32",data:[g.virtualWidth]})):(g=new Zc(d),f.push({type:"int32",data:[d.filterHeight]},{type:"int32",data:[d.filterWidth]},{type:"int32",data:[d.strideHeight,d.strideWidth]},{type:"int32",data:[d.dilationHeight,d.dilationWidth]})),e.runWebGPUProgram(g,[n,s],n.dtype,f)}var MV={kernelName:mn,backendName:"webgpu",kernelFunc:Oue};var $x=class{constructor(t){this.variableNames=["x","dy"],this.uniforms=`strides : vec2<i32>, pads : vec2<i32>, filterDims : vec2<i32>, outHeight : i32,
2023-08-05 15:03:11 +02:00
outWidth : i32, inHeight : i32, inWidth : i32, batchSize : i32, channelMul : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.filterShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="depthwise_conv2d_backprop_filter"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let wR = coords[0];
let wC = coords[1];
let d1 = coords[2];
let dm = coords[3];
let d2 = d1 * uniforms.channelMul + dm;
var dotProd = 0.0;
for (var b = 0; b < uniforms.batchSize; b++) {
for (var yR = 0; yR < uniforms.outHeight; yR++) {
let xR = wR + yR * uniforms.strides[0] - uniforms.pads[0];
if (xR < 0 || xR >= uniforms.inHeight) {
continue;
}
for (var yC = 0; yC < uniforms.outWidth; yC++) {
let xC = wC + yC * uniforms.strides[1] - uniforms.pads[1];
if (xC < 0 || xC >= uniforms.inWidth) {
continue;
}
let dyValue = getDy(b, yR, yC, d2);
let xValue = getX(b, xR, xC, d1);
dotProd += xValue * dyValue;
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}},Ex=class{constructor(t){this.variableNames=["dy","W"],this.uniforms=`strides : vec2<i32>, pads : vec2<i32>, filterDims : vec2<i32>,
2023-08-05 15:03:11 +02:00
outHeight : i32, outWidth : i32, channelMul : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.inShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="depthwise_conv2d_backprop_input"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d1 = coords[3];
let dyCorner = coords.yz - uniforms.pads;
let dyRCorner = dyCorner.x;
let dyCCorner = dyCorner.y;
var dotProd = 0.0;
for (var wR = 0; wR < uniforms.filterDims[0]; wR++) {
let dyR = f32(dyRCorner + wR) / f32(uniforms.strides[0]);
if (dyR < 0.0 || dyR >= f32(uniforms.outHeight) || fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
let wRPerm = uniforms.filterDims[0] - 1 - wR;
for (var wC = 0; wC < uniforms.filterDims[1]; wC++) {
let dyC = f32(dyCCorner + wC) / f32(uniforms.strides[1]);
if (dyC < 0.0 || dyC >= f32(uniforms.outWidth) || fract(dyC) > 0.0) {
continue;
}
let idyC = i32(dyC);
let wCPerm = uniforms.filterDims[1] - 1 - wC;
for (var dm = 0; dm < uniforms.channelMul; dm++) {
let d2 = d1 * uniforms.channelMul + dm;
let xValue = getDy(batch, idyR, idyC, d2);
let wValue = getW(wRPerm, wCPerm, d1, dm);
dotProd += xValue * wValue;
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}};function Mue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,dy:s}=t,{strides:a,dilations:i,pad:p,dimRoundingMode:u,filterShape:c}=o,l=w.computeConv2DInfo(n.shape,c,a,i,p,u,!0),m=new $x(l),d=[{type:"int32",data:[l.strideHeight,l.strideWidth]},{type:"int32",data:[l.padInfo.top,l.padInfo.left]},{type:"int32",data:[l.filterHeight,l.filterWidth]},{type:"int32",data:[l.outHeight]},{type:"int32",data:[l.outWidth]},{type:"int32",data:[l.inHeight]},{type:"int32",data:[l.inWidth]},{type:"int32",data:[l.batchSize]},{type:"int32",data:[l.outChannels/l.inChannels]}];return e.runWebGPUProgram(m,[n,s],"float32",d)}var LV={kernelName:Fi,backendName:"webgpu",kernelFunc:Mue};function Lue(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,filter:s}=t,{strides:a,dilations:i,pad:p,dimRoundingMode:u,inputShape:c}=o,l=w.computeConv2DInfo(c,s.shape,a,i,p,u,!0),m=new Ex(l),d=[{type:"int32",data:[l.strideHeight,l.strideWidth]},{type:"int32",data:[l.filterHeight-1-l.padInfo.top,l.filterWidth-1-l.padInfo.left]},{type:"int32",data:[l.filterHeight,l.filterWidth]},{type:"int32",data:[l.outHeight]},{type:"int32",data:[l.outWidth]},{type:"int32",data:[l.outChannels/l.inChannels]}];return e.runWebGPUProgram(m,[n,s],n.dtype,d)}var BV={kernelName:Pi,backendName:"webgpu",kernelFunc:Lue};var Rx=class{constructor(t){this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[t,t],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="diag"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-01-06 19:23:06 +01:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let value = select(0.0, getX(coords[0]), coords[0] == coords[1]);
setOutputAtIndex(index, value);
}
}
`}};function Bue(r){let{inputs:t,backend:e}=r,{x:o}=t,n=[...o.shape,...o.shape],s=y.sizeFromShape(o.shape),a=pe({inputs:{x:o},backend:e,attrs:{shape:[s]}}),i=new Rx(s),p=e.runWebGPUProgram(i,[a],a.dtype),u=pe({inputs:{x:p},backend:e,attrs:{shape:n}});return e.disposeData(a.dataId),e.disposeData(p.dataId),u}var zV={kernelName:oa,backendName:"webgpu",kernelFunc:Bue};var Dx=class{constructor(t){this.variableNames=["x","w"],this.uniforms="filterDims: vec2<i32>, pads: vec2<i32>, strides: vec2<i32>, dilations: vec2<i32>",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.outShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="dilation2d"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-01-06 19:23:06 +01:00
if (index < uniforms.size) {
let neg_infinity = -3.4e38;
let coords = getOutputCoords();
let batch = coords.x;
let d1 = coords.w;
2023-05-08 15:12:41 +02:00
let outTopLeftCorner = coords.yz * uniforms.strides - uniforms.pads;
2023-01-06 19:23:06 +01:00
let hBeg = outTopLeftCorner.x;
let wBeg = outTopLeftCorner.y;
var curVal = neg_infinity;
for (var h = 0; h < uniforms.filterDims[0]; h = h + 1) {
2023-05-08 15:12:41 +02:00
let hIn = hBeg + h * uniforms.dilations[0];
2023-01-06 19:23:06 +01:00
if (hIn >= 0 && hIn < uniforms.xShape[1]) {
for (var w = 0; w < uniforms.filterDims[1]; w = w + 1) {
2023-05-08 15:12:41 +02:00
let wIn = wBeg + w * uniforms.dilations[1];
2023-01-06 19:23:06 +01:00
if (wIn >= 0 && wIn < uniforms.xShape[2]) {
let val = getX(batch, hIn, wIn, d1) + getW(h, w, d1);
if (val > curVal) {
curVal = val;
}
}
}
}
}
setOutputAtIndex(index, curVal);
}
}
`}};function zue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s}=t,{strides:a,pad:i,dilations:p}=o,u=w.computeDilation2DInfo(n.shape,s.shape,a,i,"NHWC",p),c=[u.padInfo.top,u.padInfo.left],l=[{type:"int32",data:[u.filterHeight,u.filterWidth]},{type:"int32",data:[...c]},{type:"int32",data:[u.strideHeight,u.strideWidth]},{type:"int32",data:[u.dilationHeight,u.dilationWidth]}],m=new Dx(u);return e.runWebGPUProgram(m,[n,s],n.dtype,l)}var VV={kernelName:dn,backendName:"webgpu",kernelFunc:zue};var Ax=class{constructor(t,e){if(this.variableNames=["x","w","dy"],this.uniforms="filterDims: vec2<i32>, pads: vec2<i32>, strides: vec2<i32>, dilations: vec2<i32>, dySize: i32,",this.workgroupSize=[64,1,1],this.atomic=!0,this.outputShape=t.inShape,this.dispatchLayout=X(t.outShape),this.dispatch=H(this.dispatchLayout,t.outShape,this.workgroupSize),e!=="float32"&&e!=="int32")throw new Error(`Dilation2DBackpropInput only supports float32 and int32
2023-08-05 15:03:11 +02:00
types, does not support ${e} type.`);this.type=e,this.shaderKey="dilation2DBackpropInput"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.dySize) {
let coords = getDyCoordsFromIndex(index);
let b = coords[0];
let r = coords[1];
let c = coords[2];
let d = coords[3];
let dyCorner = vec2<i32>(r, c) * uniforms.strides - uniforms.pads;
var curVal = -3.4e38; // neg_infinity
var xRMax = 0;
var xCMax = 0;
// In the case of multiple argmax branches, we only back-propagate
// along the last branch, i.e., the one with largest value of
// 'wR * uniforms.filterDims[1] + wC', similarly to the max-pooling
// backward routines.
for (var wR = 0; wR < uniforms.filterDims[0]; wR++) {
let xR = dyCorner.x + wR * uniforms.dilations[0];
if (xR >= 0 && xR < uniforms.xShape[1]) {
for (var wC = 0; wC < uniforms.filterDims[1]; wC++) {
let xC = dyCorner.y + wC * uniforms.dilations[1];
if (xC >= 0 && xC < uniforms.xShape[2]) {
let val = getX(b, xR, xC, d) + getW(wR, wC, d);
if (val > curVal) {
curVal = val;
xRMax = xR;
xCMax = xC;
}
}
}
}
}
let flatIndexIn = d + uniforms.xShape[3] *
(xCMax + uniforms.xShape[2] * (xRMax + uniforms.xShape[1] * b));
let value = getDy(b, r, c, d);
${Yr("&result[flatIndexIn]","value",this.type)}
2023-05-08 15:12:41 +02:00
}
}
`}},Fx=class{constructor(t,e,o){if(this.variableNames=["x","w","dy"],this.uniforms="filterDims: vec2<i32>, pads: vec2<i32>, strides: vec2<i32>, dilations: vec2<i32>, dySize: i32,",this.workgroupSize=[64,1,1],this.atomic=!0,this.outputShape=t.filterShape,this.dispatchLayout=X(t.outShape),this.dispatch=H(this.dispatchLayout,t.outShape,this.workgroupSize),o!=="float32"&&o!=="int32")throw new Error(`Dilation2DBackpropFilter only supports float32 and int32
2023-05-08 15:12:41 +02:00
types, does not support ${o} type.`);this.type=o,this.shaderKey="dilation2DBackpropFilter"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.dySize) {
let coords = getDyCoordsFromIndex(index);
let b = coords[0];
let r = coords[1];
let c = coords[2];
let d = coords[3];
let dyCorner = vec2<i32>(r, c) * uniforms.strides - uniforms.pads;
var curVal = -3.4e38; // neg_infinity
var wRMax = 0;
var wCMax = 0;
// In the case of multiple argmax branches, we only back-propagate
// along the last branch, i.e., the one with largest value of
// 'wR * uniforms.filterDims[1] + wC', similarly to the max-pooling
// backward routines.
for (var wR = 0; wR < uniforms.filterDims[0]; wR++) {
let xR = dyCorner.x + wR * uniforms.dilations[0];
if (xR >= 0 && xR < uniforms.xShape[1]) {
for (var wC = 0; wC < uniforms.filterDims[1]; wC++) {
let xC = dyCorner.y + wC * uniforms.dilations[1];
if (xC >= 0 && xC < uniforms.xShape[2]) {
let val = getX(b, xR, xC, d) + getW(wR, wC, d);
if (val > curVal) {
curVal = val;
wRMax = wR;
wCMax = wC;
}
}
}
}
}
let flatIndexIn = d + uniforms.wShape[2] * (wCMax + wRMax * uniforms.wShape[1]);
let value = getDy(b, r, c, d);
${Yr("&result[flatIndexIn]","value",this.type)}
2023-08-05 15:03:11 +02:00
}
}
`}};function Vue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s,dy:a}=t,{strides:i,pad:p,dilations:u}=o,c=w.computeDilation2DInfo(n.shape,s.shape,i,p,"NHWC",u),l=s.dtype,m=new Fx(c,s.shape,l),d=[{type:"int32",data:[c.filterHeight,c.filterWidth]},{type:"int32",data:[c.padInfo.top,c.padInfo.left]},{type:"int32",data:[c.strideHeight,c.strideWidth]},{type:"int32",data:[c.dilationHeight,c.dilationWidth]},{type:"int32",data:[y.sizeFromShape(c.outShape)]}],f=vt({backend:e,attrs:{shape:s.shape,value:0,dtype:l}});return e.runWebGPUProgram(m,[n,s,a],l,d,f)}var WV={kernelName:Mi,backendName:"webgpu",kernelFunc:Vue};function Wue(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s,dy:a}=t,{strides:i,pad:p,dilations:u}=o,c=w.computeDilation2DInfo(n.shape,s.shape,i,p,"NHWC",u),l=n.dtype,m=new Ax(c,l),d=[{type:"int32",data:[c.filterHeight,c.filterWidth]},{type:"int32",data:[c.padInfo.top,c.padInfo.left]},{type:"int32",data:[c.strideHeight,c.strideWidth]},{type:"int32",data:[c.dilationHeight,c.dilationWidth]},{type:"int32",data:[y.sizeFromShape(c.outShape)]}],f=vt({backend:e,attrs:{shape:c.inShape,value:0,dtype:l}});return e.runWebGPUProgram(m,[n,s,a],l,d,f)}var UV={kernelName:Oi,backendName:"webgpu",kernelFunc:Wue};var Px=class{constructor(t,e,o){this.variableNames=["Image"],this.uniforms="alpha: f32,",this.workgroupSize=[64,1,1],this.pixelsOpType=Ci.DRAW,this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.type=e,this.textureFormat=o,this.shaderKey=`draw_${e}_${o}`}getUserCode(){let t,e=this.type==="float32"?"value":"value / 255.0";return t=`
2023-08-05 15:03:11 +02:00
if (uniforms.numChannels == 1) {
rgba[0] = ${e};
rgba[1] = ${e};
rgba[2] = ${e};
} else {
rgba[d] = ${e};
}`,`
@group(0) @binding(0) var outImage : texture_storage_2d<${this.textureFormat}, write>;
${G("index")} {
if (index < uniforms.size) {
var rgba = vec4<f32>(0.0, 0.0, 0.0, uniforms.alpha);
for (var d = 0; d < uniforms.numChannels; d = d + 1) {
let value = f32(inBuf[index * uniforms.numChannels + d]);
${t}
}
rgba.x = rgba.x * rgba.w;
rgba.y = rgba.y * rgba.w;
rgba.z = rgba.z * rgba.w;
let coords = getCoordsFromIndex(index);
textureStore(outImage, vec2<i32>(coords.yx), rgba);
2023-05-08 15:12:41 +02:00
}
}
`}};function Uue(r){let{inputs:t,backend:e,attrs:o}=r,{image:n}=t,{canvas:s,options:a}=o,[i,p]=n.shape.slice(0,2),{imageOptions:u}=a||{},c=(u==null?void 0:u.alpha)||1,l=e.device.features.has("bgra8unorm-storage")?"bgra8unorm":"rgba8unorm",m=[i,p],d=new Px(m,n.dtype,l);s.width=p,s.height=i;let f="webgpu",h=s.getContext(f),g;h||(g=new OffscreenCanvas(p,i),h=g.getContext(f));let x=n.shape.length===3?n.shape[2]:1;h.configure({device:e.device,format:l,usage:GPUTextureUsage.STORAGE_BINDING,alphaMode:"premultiplied"});let b="int32",C=e.makeTensorInfo(m,b),S=e.tensorMap.get(C.dataId);S.resource=h.getCurrentTexture(),S.external=!0;let k=[{type:"uint32",data:[x]},{type:"float32",data:[c]}];if(e.runWebGPUProgram(d,[n],b,k,C),g){let _=s.getContext("2d");if(!_)throw new Error("Please make sure this canvas has only been used for 2d or webgpu context!");_.drawImage(g,0,0)}return e.disposeData(C.dataId),n}var GV={kernelName:_u,backendName:"webgpu",kernelFunc:Uue};var a0=et({opType:fe.MUL,cpuKernelImpl:kz,supportsComplex:!0}),HV={kernelName:Xn,backendName:"webgpu",kernelFunc:a0};function i0(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o;return Jr(n,s,a,"sum",e)}var KV={kernelName:Ss,backendName:"webgpu",kernelFunc:i0};function Gue(r){let{inputs:t,backend:e,attrs:o}=r,{equation:n}=o,s=t,{allDims:a,summedDims:i,idDims:p}=w.decodeEinsumEquation(n,s.length);w.checkEinsumDimSizes(a.length,p,s);let{path:u,steps:c}=w.getEinsumComputePath(i,p),l=c.length,m=null,d=a.length,f=[];for(let h=0;h<l;++h){for(let g of c[h]){let{permutationIndices:x,expandDims:b}=w.getEinsumPermutation(d,p[g]),C;w.isIdentityPermutation(x)?C=s[g]:(C=xr({inputs:{x:s[g]},backend:e,attrs:{perm:x}}),f.push(C));let S=C.shape.slice();for(let k=0;k<b.length;++k)S.splice(b[k],0,1);y.arraysEqual(C.shape,S)||(C=pe({inputs:{x:C},backend:e,attrs:{shape:S}}),f.push(C)),m===null?m=C:(m=a0({inputs:{a:C,b:m},backend:e}),f.push(m))}h<l-1&&(u[h]>=0&&(m=i0({inputs:{x:m},backend:e,attrs:{axis:u[h]-(a.length-d),keepDims:!1}}),f.push(m)),d--)}for(let h of f)h!==m&&e.disposeData(h.dataId);return m}var qV={kernelName:Li,backendName:"webgpu",kernelFunc:Gue};var Hue=ye({opType:Z.ELU}),jV={kernelName:hn,backendName:"webgpu",kernelFunc:Hue};var Kue=r=>{let{inputs:t,backend:e}=r,{dy:o,y:n}=t,s=new Si(fe.ELU_DER,o.shape,n.shape);return e.runWebGPUProgram(s,[o,n],o.dtype)},XV={kernelName:Xa,backendName:"webgpu",kernelFunc:Kue};var que=et({opType:fe.EQUAL,dtype:"bool",cpuKernelImpl:cz}),YV={kernelName:xn,backendName:"webgpu",kernelFunc:que};var jue=ye({opType:Z.ERF}),QV={kernelName:gn,backendName:"webgpu",kernelFunc:jue};var Xue=ye({opType:Z.EXP,cpuKernelImpl:lz,dtype:"float32"}),ZV={kernelName:yn,backendName:"webgpu",kernelFunc:Xue};function Ox(r){let{inputs:t,attrs:e,backend:o}=r,{dim:n}=e,{input:s}=t,a=s.shape.length,i=s.shape.slice(),p=n;return n<0&&(y.assert(-(a+1)<=n,()=>`Axis must be in the interval [${-(a+1)}, ${a}]`),p=a+n+1),i.splice(p,0,1),pe({inputs:{x:s},backend:o,attrs:{shape:i}})}var JV={kernelName:na,backendName:"webgpu",kernelFunc:Ox};var Yue=ye({opType:Z.EXPM1,cpuKernelImpl:mz}),eW={kernelName:bn,backendName:"webgpu",kernelFunc:Yue};var gm=class{constructor(t,e){this.variableNames=["real","imag"],this.outputShape=[],this.uniforms="exponentMultiplier : f32, denominator: f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.component=t,this.shaderKey=`fft_${t}`}getUserCode(){return`
2022-11-20 22:20:02 +01:00
fn unaryOpComplex(real: f32, expR: f32, imag: f32, expI: f32) -> f32 {
${this.component==="real"?"return real * expR - imag * expI;":"return real * expI + imag * expR;"}
}
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
fn mulMatDFT(batch: i32, index: i32) -> f32 {
let indexRatio = f32(index) / f32(uniforms.realShape[1]);
let exponentMultiplierTimesIndexRatio =
uniforms.exponentMultiplier * indexRatio;
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
var result = 0.0;
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
for (var i = 0; i < uniforms.realShape[1]; i = i + 1) {
// x = (-2|2 * PI / N) * index * i;
let x = exponentMultiplierTimesIndexRatio * f32(i);
let expR = cos(x);
let expI = sin(x);
let real = getReal(batch, i);
let imag = getImag(batch, i);
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
result = result +
unaryOpComplex(real, expR, imag, expI) / uniforms.denominator;
}
return result;
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getOutputCoords();
setOutputAtIndex(index, mulMatDFT(coords[0], coords[1]));
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
}
`}};function Mx(r,t,e){let o=e.tensorMap.get(r.dataId),n=y.sizeFromShape(r.shape),s=r.shape[r.shape.length-1],a=n/s,i=[],p=pe({inputs:{x:r},backend:e,attrs:{shape:[a,s]}});i.push(p);let u=p.shape,c=new gm("real",u),l=new gm("imag",u),m=[{dataId:o.complexTensorInfos.real.dataId,dtype:o.complexTensorInfos.real.dtype,shape:u},{dataId:o.complexTensorInfos.imag.dataId,dtype:o.complexTensorInfos.imag.dtype,shape:u}],d=t?2*Math.PI:-2*Math.PI,f=t?u[1]:1,h=[{type:"float32",data:[d]},{type:"float32",data:[f]}],g=e.runWebGPUProgram(c,m,"float32",h);i.push(g);let x=e.runWebGPUProgram(l,m,"float32",h);i.push(x);let b=xo({inputs:{real:g,imag:x},backend:e});i.push(b);let C=pe({inputs:{x:b},backend:e,attrs:{shape:r.shape}});return i.forEach(S=>e.disposeData(S.dataId)),C}function Que(r){let{inputs:t,backend:e}=r,{input:o}=t;return Mx(o,!1,e)}var tW={kernelName:Bi,backendName:"webgpu",kernelFunc:Que};var Lx=class{constructor(t){this.outputShape=[],this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="flipLeftRight"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let coordX = uniforms.xShape[2] - coords[2] - 1;
let outputValue = getX(coords[0], coords[1], coordX, coords[3]);
setOutputAtIndex(index, outputValue);
}
}
`}};var rW={kernelName:Cn,backendName:"webgpu",kernelFunc:({inputs:r,backend:t})=>{let{image:e}=r,o=t,n=new Lx(e.shape);return o.runWebGPUProgram(n,[e],e.dtype)}};var Zue=ye({opType:Z.FLOOR,cpuKernelImpl:dz}),oW={kernelName:wn,backendName:"webgpu",kernelFunc:Zue};var Jue=et({opType:fe.FLOOR_DIV,cpuKernelImpl:fz,dtype:"int32"}),nW={kernelName:Sn,backendName:"webgpu",kernelFunc:Jue};var Bx=class{constructor(t,e,o=!1){this.pixelsOpType=Ci.FROM_PIXELS,this.outputShape=[0],this.variableNames=[],this.workgroupSize=[256,1,1],this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[e,1,1]),this.importVideo=o,this.shaderKey=`fromPixels_${this.importVideo}`}getUserCode(){let t=this.importVideo?"textureLoad(src, vec2<i32>(coords.yx));":"textureLoad(src, vec2<i32>(coords.yx), 0)";return`
2022-11-18 17:13:29 +01:00
@binding(1) @group(0) var src: ${this.importVideo?"texture_external":"texture_2d<f32>"};
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
let flatIndex = index * uniforms.numChannels;
if (flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
2023-08-05 15:03:11 +02:00
let values = ${t};
2022-11-18 17:13:29 +01:00
for (var i = 0; i < uniforms.numChannels; i = i + 1) {
result[flatIndex + i] = i32(floor(255.0 * values[i]));
}
}
}
`}};var sW={kernelName:Eu,backendName:"webgpu",kernelFunc:epe},Jc,u0=A().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");function epe(r){let{inputs:t,backend:e,attrs:o}=r,{pixels:n}=t,{numChannels:s}=o;if(n==null)throw new Error("pixels passed to tf.browser.fromPixels() can not be null");let a=typeof HTMLVideoElement!="undefined"&&n instanceof HTMLVideoElement,i=typeof HTMLImageElement!="undefined"&&n instanceof HTMLImageElement,p=typeof HTMLCanvasElement!="undefined"&&n instanceof HTMLCanvasElement||typeof OffscreenCanvas!="undefined"&&n instanceof OffscreenCanvas,u=typeof ImageBitmap!="undefined"&&n instanceof ImageBitmap,[c,l]=a?[n.videoWidth,n.videoHeight]:[n.width,n.height],m=[l,c,s],d=A().getBool("WEBGPU_IMPORT_EXTERNAL_TEXTURE")&&a,f=a||i;if(u||p||f){let b;if(d)b=e.device.importExternalTexture({source:n});else{if(f){let L=A().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");(Jc==null||L!==u0)&&(u0=L,Jc=document.createElement("canvas").getContext("2d",{willReadFrequently:u0})),Jc.canvas.width=c,Jc.canvas.height=l,Jc.drawImage(n,0,0,c,l),n=Jc.canvas}let P=GPUTextureUsage.COPY_DST|GPUTextureUsage.RENDER_ATTACHMENT|GPUTextureUsage.TEXTURE_BINDING,O="rgba8unorm",M=e.textureManager.acquireTexture(m[1],m[0],O,P);e.queue.copyExternalImageToTexture({source:n},{texture:M},[m[1],m[0]]),b=M}let C=y.sizeFromShape(m),S=y.computeStrides(m),k=new Bx(m,s,d),_=[{type:"uint32",data:[C]},{type:"uint32",data:[s]},{type:"uint32",data:[...S]}],E=e.makeTensorInfo([l,c],"int32"),R=e.tensorMap.get(E.dataId);R.resource=b;let D=e.runWebGPUProgram(k,[E],"int32",_);return e.disposeData(E.dataId),D}let h=n.data,g=h;if(s!=null&&s!==4){g=new Uint8Array(n.width*n.height*s);let b=h.length,C=0;for(let S=0;S<b;S++)S%4<s&&(g[C++]=h[S])}let x=e.makeTensorInfo(m,"int32",new Int32Array(g));return e.uploadToGPU(x.dataId),x}var zx=class{constructor(t,e,o,n,s){this.uniforms="varianceEpsilon : f32,",this.workgroupSize=[128,1,1],this.size=!0,this.variableNames=["x","mean","variance"],w.assertAndGetBroadcastShape(t,e),w.assertAndGetBroadcastShape(t,o),this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),n!=null&&(w.assertAndGetBroadcastShape(t,n),this.variableNames.push("offset")),s!=null&&(w.assertAndGetBroadcastShape(t,s),this.variableNames.push("scale")),this.offsetShape=n,this.scaleShape=s,this.shaderKey="batchNorm"}getUserCode(){let t="0.0";this.offsetShape!=null&&(t="getOffsetByOutputIndex(index)");let e="1.0";return this.scaleShape!=null&&(e="getScaleByOutputIndex(index)"),`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size)
{
let xValue = getXByOutputIndex(index);
let meanValue = getMeanByOutputIndex(index);
let varianValue = getVarianceByOutputIndex(index);
2023-08-05 15:03:11 +02:00
let offsetValue = ${t};
let scaleValue = ${e};
2022-11-18 17:13:29 +01:00
let inv = scaleValue * inverseSqrt(varianValue + f32(uniforms.varianceEpsilon));
setOutputAtIndex(index,dot(vec3<f32>(xValue, -meanValue, offsetValue), vec3<f32>(inv, inv, 1.0)));
}
}
`}};var aW={kernelName:In,backendName:"webgpu",kernelFunc:({inputs:r,attrs:t,backend:e})=>{let{x:o,scale:n,offset:s,mean:a,variance:i}=r,{varianceEpsilon:p}=t,u=e,c=[o,a,i],l=null;s!=null&&(l=s.shape,c.push(s));let m=null;n!=null&&(m=n.shape,c.push(n));let d=new zx(o.shape,a.shape,i.shape,l,m),f=[{type:"float32",data:[p]}];return u.runWebGPUProgram(d,c,o.dtype,f)}};function tpe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=t,{strides:p,pad:u,dataFormat:c,dilations:l,dimRoundingMode:m,activation:d,leakyreluAlpha:f}=o,h=w.convertConv2DDataFormat(c),g=w.computeConv2DInfo(n.shape,s.shape,p,l,u,m,!1,h);return yx({x:n,filter:s,convInfo:g,backend:e,bias:a,preluActivationWeights:i,leakyreluAlpha:f,activation:d})}var iW={kernelName:Io,backendName:"webgpu",kernelFunc:tpe};function rpe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=t,{strides:p,pad:u,dilations:c,dimRoundingMode:l,activation:m,leakyreluAlpha:d}=o,f=c;f==null&&(f=[1,1]),y.assert(w.eitherStridesOrDilationsAreOne(p,f),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${p} and dilations '${f}'`);let h=w.computeConv2DInfo(n.shape,s.shape,p,f,u,l,!0),g=[n,s],x=a!=null,b=i!=null;x&&g.push(a),b&&g.push(i);let C=[{type:"int32",data:[h.padInfo.top,h.padInfo.left]},{type:"int32",data:[h.inHeight,h.inWidth]}],S;return h.outHeight>4&&h.outWidth>4&&h.strideWidth<=2&&h.inChannels===h.outChannels&&h.dilationHeight===1&&h.dilationWidth===1&&h.inChannels%4===0?(S=new Qc(h,x,m,b),C.push({type:"int32",data:[S.virtualWidth]})):(S=new Zc(h,x,m,b),C.push({type:"int32",data:[h.filterHeight]},{type:"int32",data:[h.filterWidth]},{type:"int32",data:[h.strideHeight,h.strideWidth]},{type:"int32",data:[h.dilationHeight,h.dilationWidth]})),m==="leakyrelu"&&(C.push({type:"float32",data:[d]}),S.uniforms+=" alpha : f32,"),e.runWebGPUProgram(S,g,"float32",C)}var uW={kernelName:vo,backendName:"webgpu",kernelFunc:rpe};var Vx=class{constructor(t,e){this.variableNames=["A","indices"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey=`gathernd_${t}`,this.sliceDim=t,this.uniforms=`sliceDim : i32, strides : ${ft(t)},`}getUserCode(){let t;return this.sliceDim>1?t="uniforms.strides[j]":t="uniforms.strides",`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var flattenIndex = 0;
for (var j = 0; j < uniforms.sliceDim; j = j + 1) {
let indexTemp = i32(round(getIndices(coords[0], j)));
2023-08-05 15:03:11 +02:00
let strideNum = ${t};
2022-11-18 17:13:29 +01:00
flattenIndex = flattenIndex + indexTemp * strideNum;
}
setOutputAtIndex(index, getA(flattenIndex, coords[1]));
}
}
`}};function ope(r){let{inputs:t,backend:e}=r,{params:o,indices:n}=t,s=n.shape,a=s[s.length-1],i=y.sizeFromShape(o.shape),[p,u,c,l]=w.prepareAndValidate(o,n),m=pe({inputs:{x:n},backend:e,attrs:{shape:[u,a]}}),d=pe({inputs:{x:o},backend:e,attrs:{shape:[y.sizeFromShape(o.shape)/c,c]}});if(e.shouldExecuteOnCPU([o,n])||o.dtype==="string"){let b=e.readSync(n.dataId),C=e.bufferSync(o),S=hz(b,C,o.dtype,u,a,c,l,o.shape,i);return e.makeTensorInfo(p,o.dtype,S.values)}let f=new Vx(a,[u,c]),h=[{type:"int32",data:[a]},{type:"int32",data:l}],g=e.runWebGPUProgram(f,[d,m],d.dtype,h),x=pe({inputs:{x:g},backend:e,attrs:{shape:p}});return e.disposeData(m.dataId),e.disposeData(d.dataId),e.disposeData(g.dataId),x}var pW={kernelName:vn,backendName:"webgpu",kernelFunc:ope};var Wx=class{constructor(t,e){this.variableNames=["A","indices"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.slice(),this.aShape=t,this.outputShape=e,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="gather"}getUserCode(){let t=npe(this.aShape);return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
let indexZ = i32(getIndices(resRC.x, resRC.z));
let inBounds = select(0.0, 1.0, indexZ >= 0 && indexZ < uniforms.aShape[2]);
2023-08-05 15:03:11 +02:00
setOutputAtIndex(index, inBounds * getA(${t}));
2022-11-18 17:13:29 +01:00
}
}
`}};function npe(r){let t=["resRC.x","resRC.y","resRC.z","resRC.w"],e=[];for(let o=0;o<r.length;o++)o===2?e.push("indexZ"):e.push(`${t[o]}`);return e.join()}function p0(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,indices:s}=t,{axis:a,batchDims:i}=o,p=y.parseAxisParam(a,n.shape)[0],u=w.segment_util.collectGatherOpShapeInfo(n,s,p,i),c=y.sizeFromShape(s.shape),l=[],m=pe({inputs:{x:n},backend:e,attrs:{shape:[u.batchSize,u.outerSize,u.dimSize,u.sliceSize]}}),d=pe({inputs:{x:s},backend:e,attrs:{shape:[u.batchSize,c/u.batchSize]}});l.push(m),l.push(d);let f=[u.batchSize,u.outerSize,c/u.batchSize,u.sliceSize];if(e.shouldExecuteOnCPU([n,s])){let C=e.tensorMap.get(d.dataId).values,S=me(d.shape,d.dtype,C),_=e.tensorMap.get(m.dataId).values,E=me(m.shape,m.dtype,_),R=gz(E,S,f);return l.forEach(D=>e.disposeData(D.dataId)),e.makeTensorInfo(u.outputShape,R.dtype,R.values)}let h=new Wx(m.shape,f),g=e.runWebGPUProgram(h,[m,d],m.dtype);l.push(g);let x=pe({inputs:{x:g},backend:e,attrs:{shape:u.outputShape}});return l.forEach(b=>e.disposeData(b.dataId)),x}var cW={kernelName:aa,backendName:"webgpu",kernelFunc:p0};var spe=et({opType:fe.GREATER,cpuKernelImpl:yz,dtype:"bool"}),lW={kernelName:kn,backendName:"webgpu",kernelFunc:spe};var ape=et({opType:fe.GREATER_EQUAL,dtype:"bool",cpuKernelImpl:xz}),mW={kernelName:Nn,backendName:"webgpu",kernelFunc:ape};function ipe(r){let{inputs:t,backend:e}=r,{input:o}=t;return Mx(o,!0,e)}var dW={kernelName:zi,backendName:"webgpu",kernelFunc:ipe};var upe=ye({opType:Z.IS_FINITE,dtype:"bool"}),fW={kernelName:Tn,backendName:"webgpu",kernelFunc:upe};var ppe=ye({opType:Z.IS_INF,dtype:"bool"}),hW={kernelName:_n,backendName:"webgpu",kernelFunc:ppe};var cpe=ye({opType:Z.IS_NAN,dtype:"bool"}),gW={kernelName:$n,backendName:"webgpu",kernelFunc:cpe};function lpe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{alpha:s}=o,a=[{type:"float32",data:[s]}],i=new Zr(n.shape,Z.LEAKYRELU,"alpha : f32,");return e.runWebGPUProgram(i,[n],"float32",a)}var xW={kernelName:En,backendName:"webgpu",kernelFunc:lpe};var mpe=et({opType:fe.LESS,dtype:"bool",cpuKernelImpl:Cz}),yW={kernelName:Rn,backendName:"webgpu",kernelFunc:mpe};var dpe=et({opType:fe.LESS_EQUAL,dtype:"bool",cpuKernelImpl:bz}),bW={kernelName:Dn,backendName:"webgpu",kernelFunc:dpe};var Ux=class{constructor(t){this.variableNames=[],this.outputShape=[],this.uniforms="start : f32, step : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[t],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="linSpace"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-01-06 19:23:06 +01:00
if (index < uniforms.size) {
setOutputAtIndex(index, uniforms.start + f32(index) * uniforms.step);
}
}
`}};function fpe(r){let{backend:t,attrs:e}=r,{start:o,stop:n,num:s}=e,a=(n-o)/(s-1),i=new Ux(s),p=[{type:"float32",data:[o]},{type:"float32",data:[a]}];return t.runWebGPUProgram(i,[],"float32",p)}var CW={kernelName:An,backendName:"webgpu",kernelFunc:fpe};var hpe=ye({opType:Z.LOG,cpuKernelImpl:wz}),wW={kernelName:Fn,backendName:"webgpu",kernelFunc:hpe};var gpe=ye({opType:Z.LOG1P}),SW={kernelName:Pn,backendName:"webgpu",kernelFunc:gpe};var xpe=et({opType:fe.LOGICAL_AND,dtype:"bool"}),IW={kernelName:On,backendName:"webgpu",kernelFunc:xpe};var ype=ye({opType:Z.LOGICAL_NOT}),vW={kernelName:Mn,backendName:"webgpu",kernelFunc:ype};var bpe=et({opType:fe.LOGICAL_OR}),kW={kernelName:Ln,backendName:"webgpu",kernelFunc:bpe};var NW=`
2023-01-06 19:23:06 +01:00
var powValue = 0.0;
let basis = uniforms.bias + uniforms.alpha * sum;
if (uniforms.beta == 0.5) {
powValue = inverseSqrt(basis);
} else if (uniforms.beta == 1.0) {
powValue = 1.0 / basis;
} else {
powValue = exp(log(basis) * (-uniforms.beta));
}
`,Gx=class{constructor(t){this.outputShape=[],this.variableNames=["x"],this.uniforms="radius : i32, bias : f32, alpha : f32, beta : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="lrn"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-01-06 19:23:06 +01:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let b = coords[0];
let r = coords[1];
let c = coords[2];
let d = coords[3];
let x = getX(b, r, c, d);
var sum = 0.0;
for (var i = -uniforms.radius; i <= uniforms.radius; i = i + 1) {
let idx = d + i;
if (idx >= 0 && idx < uniforms.xShape[3]) {
let z = getX(b, r, c, idx);
sum = sum + z * z;
}
}
${NW}
2023-01-06 19:23:06 +01:00
setOutputAtIndex(index, x * powValue);
}
}
`}},Hx=class{constructor(t,e){this.outputShape=[],this.variableNames=["x"],this.uniforms="radius : i32, bias : f32, alpha : f32, beta : f32,",this.workgroupSize=[256,1,1],this.maxAllowRadius=16,y.assert(e<=this.maxAllowRadius,()=>`Radius must be less than or equal to ${this.maxAllowRadius}, current radius is ${e}`),this.outputShape=t,this.elementsPerWorkgroup=this.workgroupSize[0]-2*this.maxAllowRadius,this.dispatchLayout={x:[3],y:[2],z:[0,1]},this.dispatch=H(this.dispatchLayout,this.outputShape,[this.elementsPerWorkgroup,this.workgroupSize[1],this.workgroupSize[2]]),this.shaderKey="lrn_shared"}getUserCode(){return`
2023-01-06 19:23:06 +01:00
var <workgroup>lrnSub: array<f32, ${this.workgroupSize[0]}>;
const elementsPerWorkgroup = ${this.elementsPerWorkgroup};
const maxAllowRadius = ${this.maxAllowRadius};
2023-08-05 15:03:11 +02:00
${G()} {
2023-01-06 19:23:06 +01:00
let localDepth = i32(localId.x);
let workgroupDepth = i32(workgroupId.x) * elementsPerWorkgroup;
let xDepth = workgroupDepth + localDepth - maxAllowRadius;
let b = i32(globalId.z) / uniforms.xShape[1];
let r = i32(globalId.z) - b * uniforms.xShape[1];
let c = i32(globalId.y);
let d = workgroupDepth + localDepth;
var x = 0.0;
if (xDepth >= 0 && xDepth < uniforms.xShape[3]) {
x = getX(b, r, c, xDepth);
}
lrnSub[localDepth] = x;
workgroupBarrier();
if (localDepth < elementsPerWorkgroup && d < uniforms.outShape[3]) {
var sum = 0.0;
let index = localDepth + maxAllowRadius;
for (var i = -uniforms.radius; i <= uniforms.radius; i = i + 1) {
let z = lrnSub[index + i];
sum = sum + z * z;
}
${NW}
2023-01-06 19:23:06 +01:00
setOutputAtCoords(b, r, c, d, lrnSub[index] * powValue);
}
} `}};function Cpe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{depthRadius:s,bias:a,alpha:i,beta:p}=o,u;s>16?u=new Gx(n.shape):u=new Hx(n.shape,s);let c=[{type:"int32",data:[s]},{type:"float32",data:[a]},{type:"float32",data:[i]},{type:"float32",data:[p]}];return e.runWebGPUProgram(u,[n],n.dtype,c)}var TW={kernelName:Bn,backendName:"webgpu",kernelFunc:Cpe};var Kx=class{constructor(t){this.outputShape=[],this.variableNames=["inputImage","outputImage","dy"],this.uniforms="depthRadius : i32, bias : f32, alpha : f32, beta : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="lrn_grad"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let b = coords[0];
let r = coords[1];
let c = coords[2];
let MIN_DEPTH_BEGIN = 0;
let MAX_DEPTH_END = uniforms.outShape[3];
var result = 0.0;
for (var d = MIN_DEPTH_BEGIN; d < MAX_DEPTH_END; d++) {
let depthBegin = max(MIN_DEPTH_BEGIN, d - uniforms.depthRadius);
let depthEnd = min(MAX_DEPTH_END, d + uniforms.depthRadius + 1);
var norm = 0.0;
for (var k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; k++) {
if (k < depthBegin) {
continue;
} else if (k >= depthBegin && k < depthEnd) {
norm += getInputImage(b, r, c, k) * getInputImage(b, r, c, k);
} else {
break;
}
}
norm = uniforms.alpha * norm + uniforms.bias;
for (var k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; k++) {
if (k < depthBegin) {
continue;
} else if (k >= depthBegin && k < depthEnd) {
var dyi = -2.0 * uniforms.alpha * uniforms.beta
* getInputImage(b, r, c, k) * getOutputImage(b, r, c, d) / norm;
if (k == d) {
dyi += pow(norm, -1.0 * uniforms.beta);
}
if (k == coords[3]) {
dyi *= getDy(b, r, c, d);
result += dyi;
}
} else {
break;
}
}
}
setOutputAtIndex(index, result);
}
}
`}};function wpe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,y:s,dy:a}=t,{depthRadius:i,bias:p,alpha:u,beta:c}=o,l=new Kx(n.shape),m=[{type:"int32",data:[i]},{type:"float32",data:[p]},{type:"float32",data:[u]},{type:"float32",data:[c]}];return e.runWebGPUProgram(l,[n,s,a],n.dtype,m)}var _W={kernelName:Ya,backendName:"webgpu",kernelFunc:wpe};var Spe=et({opType:fe.MAX,cpuKernelImpl:Iz}),$W={kernelName:Vn,backendName:"webgpu",kernelFunc:Spe};function Ipe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1,c=w.computePool2DInfo(n.shape,s,a,u,i,p);return sx(n,c,"max",e)}var EW={kernelName:Wn,backendName:"webgpu",kernelFunc:Ipe};function vpe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{filterSize:s,strides:a,pad:i,dataFormat:p,dimRoundingMode:u}=o,c=[1,1,1],l=w.computePool3DInfo(n.shape,s,a,c,i,u,p),m=new wu(l,"max"),d=[{type:"int32",data:[l.strideDepth,l.strideHeight,l.strideWidth]},{type:"int32",data:[l.padInfo.front,l.padInfo.top,l.padInfo.left]},{type:"int32",data:[l.inDepth,l.inHeight,l.inWidth]},{type:"int32",data:[l.effectiveFilterDepth,l.effectiveFilterHeight,l.effectiveFilterWidth]}];return e.runWebGPUProgram(m,[n],n.dtype,d)}var RW={kernelName:ia,backendName:"webgpu",kernelFunc:vpe};var qx=class{constructor(t){this.variableNames=["dy","maxPos"],this.uniforms=`strides : vec2<i32>, pads : vec2<i32>, dilations : vec2<i32>, filterDims : vec2<i32>,
2023-08-05 15:03:11 +02:00
outHeight : i32, outWidth : i32`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.inShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="maxPool2DBackprop"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d = coords[3];
let dyRCCorner = vec2<i32>(coords.yz) - uniforms.pads;
let dyRCorner = dyRCCorner.x;
let dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
let lastIndex = uniforms.filterDims[0] * uniforms.filterDims[1] - 1;
for (var wR = 0; wR < uniforms.filterDims[0]; wR += uniforms.dilations[0]) {
let dyR = f32(dyRCorner + wR) / f32(uniforms.strides[0]);
if (dyR < 0.0 || dyR >= f32(uniforms.outHeight) || fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims[1]; wC += uniforms.dilations[1]) {
let dyC = f32(dyCCorner + wC) / f32(uniforms.strides[1]);
if (dyC < 0.0 || dyC >= f32(uniforms.outWidth) || fract(dyC) > 0.0) {
continue;
}
let idyC = i32(dyC);
let dyValue = getDy(batch, idyR, idyC, d);
let maxPosValue = lastIndex - i32(getMaxPos(batch, idyR, idyC, d));
// Get the current value, check it against the value from the
// position matrix.
let curPosValue = wR * uniforms.filterDims[1] + wC;
let mask = select(0.0, 1.0, maxPosValue == curPosValue);
dotProd += dyValue * mask;
}
}
setOutputAtIndex(index, dotProd);
}
}
`}},jx=class{constructor(t){this.variableNames=["dy","maxPos"],this.uniforms=`strides : vec3<i32>, pads : vec3<i32>, filterDims : vec3<i32>,
2023-08-05 15:03:11 +02:00
outDepth : i32, outHeight : i32, outWidth : i32`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.inShape,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="maxPool3DBackprop"}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords.x;
let ch = coords.u;
let dyCorner = vec3<i32>(coords.y, coords.z, coords.w) - uniforms.pads;
let dyDCorner = dyCorner.x;
let dyRCorner = dyCorner.y;
let dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, ch) with pos mask(:, :, :, d) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
let lastIndex = uniforms.filterDims[0] * uniforms.filterDims[1] * uniforms.filterDims[2] - 1;
for (var wD = 0; wD < uniforms.filterDims[0]; wD++) {
let dyD = f32(dyDCorner + wD) / f32(uniforms.strides[0]);
if (dyD < 0.0 || dyD >= f32(uniforms.outDepth) || fract(dyD) > 0.0) {
continue;
}
let idyD = i32(dyD);
for (var wR = 0; wR < uniforms.filterDims[1]; wR++) {
let dyR = f32(dyRCorner + wR) / f32(uniforms.strides[1]);
if (dyR < 0.0 || dyR >= f32(uniforms.outHeight) || fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims[2]; wC++) {
let dyC = f32(dyCCorner + wC) / f32(uniforms.strides[2]);
if (dyC < 0.0 || dyC >= f32(uniforms.outWidth) || fract(dyC) > 0.0) {
continue;
}
let idyC = i32(dyC);
let dyValue = getDy(batch, idyD, idyR, idyC, ch);
let maxPosValue = lastIndex - i32(getMaxPos(batch, idyD, idyR, idyC, ch));
// Get the current value, check it against the value from the
// position matrix.
let curPosValue = wD * uniforms.filterDims[1] * uniforms.filterDims[2] + wR * uniforms.filterDims[2] + wC;
let mask = select(0.0, 1.0, maxPosValue == curPosValue);
dotProd += dyValue * mask;
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}};function kpe(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s}=t,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=[1,1,1],m=w.computePool3DInfo(a.shape,i,p,l,u,c),d=new wu(m,"max",!0),f=[{type:"int32",data:[m.strideDepth,m.strideHeight,m.strideWidth]},{type:"int32",data:[m.padInfo.front,m.padInfo.top,m.padInfo.left]},{type:"int32",data:[m.inDepth,m.inHeight,m.inWidth]},{type:"int32",data:[m.effectiveFilterDepth,m.effectiveFilterHeight,m.effectiveFilterWidth]}],h=e.runWebGPUProgram(d,[a],"int32",f),g=new jx(m);f=[{type:"int32",data:[m.strideDepth,m.strideHeight,m.strideWidth]},{type:"int32",data:[m.effectiveFilterDepth-1-m.padInfo.front,m.effectiveFilterHeight-1-m.padInfo.top,m.effectiveFilterWidth-1-m.padInfo.left]},{type:"int32",data:[m.effectiveFilterDepth,m.effectiveFilterHeight,m.effectiveFilterWidth]},{type:"int32",data:[m.outDepth]},{type:"int32",data:[m.outHeight]},{type:"int32",data:[m.outWidth]}];let x=e.runWebGPUProgram(g,[n,h],a.dtype,f);return e.disposeData(h.dataId),x}var DW={kernelName:Ui,backendName:"webgpu",kernelFunc:kpe};function Npe(r){let{inputs:t,backend:e,attrs:o}=r,{dy:n,input:s,output:a}=t,i=s;mm([s,a],"maxPoolGrad");let{filterSize:p,strides:u,pad:c,dimRoundingMode:l}=o,m=w.computePool2DInfo(i.shape,p,u,1,c,l),d=new Ba(m,"max",!0),f=[{type:"int32",data:[m.strideHeight,m.strideWidth]},{type:"int32",data:[m.padInfo.top,m.padInfo.left]},{type:"int32",data:[m.dilationHeight,m.dilationWidth]},{type:"int32",data:[m.inHeight,m.inWidth]},{type:"int32",data:[m.effectiveFilterHeight,m.effectiveFilterWidth]}],h=e.runWebGPUProgram(d,[i],"int32",f),g=new qx(m);f=[{type:"int32",data:[m.strideHeight,m.strideWidth]},{type:"int32",data:[m.effectiveFilterHeight-1-m.padInfo.top,m.effectiveFilterWidth-1-m.padInfo.left]},{type:"int32",data:[m.dilationHeight,m.dilationWidth]},{type:"int32",data:[m.effectiveFilterHeight,m.effectiveFilterWidth]},{type:"int32",data:[m.outHeight]},{type:"int32",data:[m.outWidth]}];let x=e.runWebGPUProgram(g,[n,h],i.dtype,f);return e.disposeData(h.dataId),x}var AW={kernelName:Wi,backendName:"webgpu",kernelFunc:Npe};function Tpe(r){let{inputs:t,backend:e,attrs:o}=r,{filterSize:n,strides:s,pad:a,includeBatchInIndex:i}=o,{x:p}=t;y.assert(p.shape.length===4,()=>`Error in maxPool: input must be rank 4 but got rank ${p.shape.length}.`);let u=[1,1];y.assert(w.eitherStridesOrDilationsAreOne(s,u),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${s} and dilations '${u}'`);let c=w.computePool2DInfo(p.shape,n,s,u,a),l=[{type:"int32",data:[c.strideHeight,c.strideWidth]},{type:"int32",data:[c.padInfo.top,c.padInfo.left]},{type:"int32",data:[c.dilationHeight,c.dilationWidth]},{type:"int32",data:[c.inHeight,c.inWidth]},{type:"int32",data:[c.effectiveFilterHeight,c.effectiveFilterWidth]}],m=new Ba(c,"max",!1),d=e.runWebGPUProgram(m,[p],p.dtype,l);m=new Ba(c,"max",!0,!0,i);let f=e.runWebGPUProgram(m,[p],"int32",l);return[d,f]}var FW={kernelName:ua,backendName:"webgpu",kernelFunc:Tpe};function _pe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o;return Jr(n,s,a,"min",e)}var PW={kernelName:Gn,backendName:"webgpu",kernelFunc:_pe};var $pe=et({opType:fe.MIN,cpuKernelImpl:vz}),OW={kernelName:Hn,backendName:"webgpu",kernelFunc:$pe};var Xx=class{constructor(t,e,o){this.uniforms="",this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e.map((n,s)=>n[0]+t[s]+n[1]),this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.xShape=t,e.map((n,s)=>{this.uniforms+=` pad${s} : vec2<i32>,`}),this.offset=o==="reflect"?0:1,this.shaderKey=`mirrorPad_${o}`}getUserCode(){let t=this.xShape.length,e=this.xShape.map((u,c)=>`uniforms.pad${c}[0]`).join(","),o=this.xShape.map((u,c)=>`uniforms.pad${c}[0] + uniforms.xShape${t>1?`[${c}]`:""}`).join(","),n=t===1?"start":"start[i]",s=t===1?"end":"end[i]",a=t===1?"outC":"outC[i]",i=ft(t),p=t>1?["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,t):"coords";return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
2023-08-05 15:03:11 +02:00
let start = ${i}(${e});
2022-11-18 17:13:29 +01:00
let end = ${i}(${o});
var outC = getCoordsFromIndex(index);
2023-08-05 15:03:11 +02:00
for (var i = 0; i < ${t}; i = i + 1) {
2022-11-18 17:13:29 +01:00
if (${a} < ${n}) {
${a} = ${n} * 2 - ${a} - ${this.offset};
} else if(${a} >= ${s}) {
${a} = (${s} - 1) * 2 - ${a} + ${this.offset};
}
}
let coords = outC - start;
setOutputAtIndex(index, getX(${p}));
}
}
`}};var MW={kernelName:Kn,backendName:"webgpu",kernelFunc:({inputs:r,attrs:t,backend:e})=>{let{x:o}=r,{paddings:n,mode:s}=t,a=e,i=n.map(c=>({type:"int32",data:[c[0],c[1]]})),p=new Xx(o.shape,n,s);return a.runWebGPUProgram(p,[o],o.dtype,i)}};var Epe=et({opType:fe.MOD}),LW={kernelName:qn,backendName:"webgpu",kernelFunc:Epe};var Yx=class{constructor(t,e){this.variableNames=["probs"],this.outputShape=[],this.uniforms="seed : f32, numOutcomes: i32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[t,e],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="multinomial"}getUserCode(){return`
2023-05-08 15:12:41 +02:00
//Based on the work of Dave Hoskins
//https://www.shadertoy.com/view/4djSRW
fn random (seed : f32, resultUV : vec2<f32>) -> f32 {
let HASHSCALE1 = 443.8975;
let p = resultUV * seed;
var p3 = fract(vec3<f32>(p.xyx) * HASHSCALE1);
p3 = p3 + dot(p3, p3.yzx + 19.19);
return fract((p3.x + p3.y) * p3.z);
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let batch = coords[0];
let resUV = vec2<f32>(f32(coords[1]) / f32(uniforms.outShape[1]),
f32(coords[0]) / f32(uniforms.outShape[0]));
let r = random(uniforms.seed, resUV);
var cdf = 0.0;
for (var i = 0; i < uniforms.numOutcomes - 1; i = i + 1) {
cdf = cdf + getProbs(batch, i);
if (r < cdf) {
setOutputAtIndexI32(index, i);
return;
}
}
// If no other event happened, last event happened.
setOutputAtIndexI32(index, uniforms.numOutcomes - 1);
}
}
`}};var Qx=class{constructor(t){this.variableNames=["logits"],this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=[this.outputShape[0],1,1],this.outputShape[1]>=4096?this.workgroupSize=[256,1,1]:this.workgroupSize=[64,1,1],this.shaderKey="softmax"}getUserCode(){return`
2023-05-08 15:12:41 +02:00
var<workgroup> buf : array<f32, ${this.workgroupSize[0]}>;
var<workgroup> rowMaxShared : f32;
var<workgroup> rowSumShared : f32;
const blockSize = ${this.workgroupSize[0]};
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
let row = index / blockSize;
let tid = i32(localId.x);
let cols = uniforms.outShape[1];
var threadMax = -3.402823e+38f;
for (var col = tid; col < cols; col += blockSize) {
let value = getLogits(row, col);
threadMax = max(threadMax, value);
}
if (tid < cols) {
buf[tid] = threadMax;
}
workgroupBarrier();
var reduceSize = min(cols, blockSize);
for (var currSize = reduceSize >> 1; currSize > 0; currSize = reduceSize >> 1) {
reduceSize = currSize + (reduceSize & 1);
if (tid < currSize) {
buf[tid] = max(buf[tid], buf[tid + reduceSize]);
}
workgroupBarrier();
}
if (tid == 0) {
rowMaxShared = buf[0];
}
workgroupBarrier();
var threadSum = 0.0;
for (var col = tid; col < cols; col += blockSize) {
let subExp = exp(getLogits(row, col) - rowMaxShared);
threadSum += subExp;
}
buf[tid] = threadSum;
workgroupBarrier();
for (var currSize = blockSize >> 1; currSize > 0; currSize = currSize >> 1) {
if (tid < currSize) {
buf[tid] = buf[tid] + buf[tid + currSize];
}
workgroupBarrier();
}
if (tid == 0) {
rowSumShared = buf[0];
}
workgroupBarrier();
for (var col = tid; col < cols; col += blockSize) {
let value = exp(getLogits(row, col) - rowMaxShared) / rowSumShared;
setOutputAtCoords(row, col, value);
}
}
`}};function c0(r){let{inputs:t,backend:e,attrs:o}=r,{logits:n}=t,{dim:s}=o,a=pe({inputs:{x:n},backend:e,attrs:{shape:[y.sizeFromShape(n.shape)/n.shape[s],n.shape[s]]}}),i=new Qx(a.shape),p=e.runWebGPUProgram(i,[a],n.dtype),u=pe({inputs:{x:p},backend:e,attrs:{shape:n.shape}});return e.disposeData(a.dataId),e.disposeData(p.dataId),u}var BW={kernelName:Is,backendName:"webgpu",kernelFunc:c0};function Rpe(r){let{inputs:t,backend:e,attrs:o}=r,{logits:n}=t,{numSamples:s,seed:a,normalized:i}=o,p=i?n:c0({inputs:{logits:n},backend:e,attrs:{dim:n.shape.length-1}}),u=p.shape[0],c=p.shape[1],l=new Yx(u,s),m=[{type:"float32",data:[a]},{type:"int32",data:[c]}],d=e.runWebGPUProgram(l,[p],"int32",m);return i||e.disposeData(p.dataId),d}var zW={kernelName:jn,backendName:"webgpu",kernelFunc:Rpe};function Dpe(r){let{inputs:t,backend:e}=r,{x:o}=t;if(e.shouldExecuteOnCPU([o])){let s=e.tensorMap.get(o.dataId),[a,i]=Nz(s.values,o.shape,o.dtype);return e.makeTensorInfo(i,o.dtype,a)}let n=new Zr(o.shape,Z.NEG);return e.runWebGPUProgram(n,[o],o.dtype)}var VW={kernelName:pa,backendName:"webgpu",kernelFunc:Dpe};function Ape(r){console.warn("tf.nonMaxSuppression() in webgpu locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:t,backend:e,attrs:o}=r,{boxes:n,scores:s}=t,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p}=o,u=e.readSync(n.dataId),c=e.readSync(s.dataId),{selectedIndices:l}=Vt.nonMaxSuppressionV3Impl(u,c,a,i,p);return e.makeTensorInfo([l.length],"int32",new Int32Array(l))}var WW={kernelName:Qn,backendName:"webgpu",kernelFunc:Ape};function Fpe(r){console.warn("tf.nonMaxSuppression() in webgpu locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:t,backend:e,attrs:o}=r,{boxes:n,scores:s}=t,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,softNmsSigma:u}=o,c=e.readSync(n.dataId),l=e.readSync(s.dataId),m=a,d=i,f=p,h=u,{selectedIndices:g,selectedScores:x}=Vt.nonMaxSuppressionV5Impl(c,l,m,d,f,h);return[e.makeTensorInfo([g.length],"int32",new Int32Array(g)),e.makeTensorInfo([x.length],"float32",new Float32Array(x))]}var UW={kernelName:Zn,backendName:"webgpu",kernelFunc:Fpe};var Zx=class{constructor(t,e){this.variableNames=["x"],this.uniforms="onValue : f32, offValue : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[t,e],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="onehot"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
setOutputAtIndex(index, mix(uniforms.offValue, uniforms.onValue,
f32(i32(round(getX(coords.x))) == coords.y)));
}
}
`}};function Ppe(r){let{inputs:t,backend:e,attrs:o}=r,{indices:n}=t,{dtype:s,depth:a,onValue:i,offValue:p}=o,u=y.sizeFromShape(n.shape),c=new Zx(u,a),l=pe({inputs:{x:n},backend:e,attrs:{shape:[u]}}),m=[{type:"float32",data:[i]},{type:"float32",data:[p]}],d=e.runWebGPUProgram(c,[l],s,m);e.disposeData(l.dataId);let f=[...n.shape,a],h=pe({inputs:{x:d},backend:e,attrs:{shape:f}});return e.disposeData(d.dataId),h}var GW={kernelName:Jn,backendName:"webgpu",kernelFunc:Ppe};function xm(r){let{inputs:t,backend:e}=r,{x:o}=t;if(o.dtype==="complex64"){let n=Ii({inputs:{input:o},backend:e}),s=xm({inputs:{x:n},backend:e}),a=$p({inputs:{input:o},backend:e}),i=xm({inputs:{x:a},backend:e}),p=xo({inputs:{real:s,imag:i},backend:e});return e.disposeData(n.dataId),e.disposeData(s.dataId),e.disposeData(a.dataId),e.disposeData(i.dataId),p}else return vt({attrs:{shape:o.shape,dtype:o.dtype,value:o.dtype==="string"?"":0},backend:e})}var HW={kernelName:Sa,backendName:"webgpu",kernelFunc:xm};function KW(r){let{inputs:t,backend:e}=r,{x:o}=t;if(o.dtype==="string")throw new Error("onesLike is not supported under string dtype");if(o.dtype==="complex64"){let n=Ii({inputs:{input:o},backend:e}),s=KW({inputs:{x:n},backend:e}),a=$p({inputs:{input:o},backend:e}),i=xm({inputs:{x:a},backend:e}),p=xo({inputs:{real:s,imag:i},backend:e});return e.disposeData(n.dataId),e.disposeData(s.dataId),e.disposeData(a.dataId),e.disposeData(i.dataId),p}else return vt({attrs:{shape:o.shape,dtype:o.dtype,value:1},backend:e})}var qW={kernelName:ca,backendName:"webgpu",kernelFunc:KW};function Ope(r){let{inputs:t,backend:e,attrs:o}=r,{axis:n}=o;if(t.length===1)return Ox({inputs:{input:t[0]},backend:e,attrs:{dim:n}});let s=t[0].shape,a=t[0].dtype;t.forEach(c=>{y.assertShapesMatch(s,c.shape,"All tensors passed to stack must have matching shapes"),y.assert(a===c.dtype,()=>"All tensors passed to stack must have matching dtypes")});let i=[],p=t.map(c=>{let l=Ox({inputs:{input:c},backend:e,attrs:{dim:n}});return i.push(l),l}),u=s0({inputs:p,backend:e,attrs:{axis:n}});return i.forEach(c=>e.disposeData(c.dataId)),u}var jW={kernelName:la,backendName:"webgpu",kernelFunc:Ope};function l0(r,t=!1){let e=r.length,o=ft(e),n=r.map((l,m)=>`uniforms.pad${m}[0]`).join(","),s=r.map((l,m)=>`uniforms.pad${m}[0] + uniforms.xShape${e>1?`[${m}]`:""}`).join(","),a=e>1?`${o}(${n})`:`${n}`,i=e>1?`${o}(${s})`:`${s}`,p=e>1?"any(paddedCoords < start)":"paddedCoords < start",u=e>1?"any(paddedCoords >= end)":"paddedCoords >= end",c=e>1?["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,e):"coords";return`
let start = ${a};
let end = ${i};
if (${p} || ${u}) {
2023-08-05 15:03:11 +02:00
setOutputAtIndex(index, ${t?0:"uniforms.constantValue"});
} else {
let coords = paddedCoords - start;
setOutputAtIndex(index, getX(${c}));
}
`}var Jx=class{constructor(t,e){this.variableNames=["x"],this.uniforms="constantValue : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e.map((o,n)=>o[0]+t[n]+o[1]),this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),e.map((o,n)=>{this.uniforms+=` pad${n} : vec2<i32>,`}),this.xShape=t,this.shaderKey="pad"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let paddedCoords = getCoordsFromIndex(index);
${l0(this.xShape)}
2022-11-18 17:13:29 +01:00
}
}
`}};var Mpe=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{paddings:s,constantValue:a}=o;if(s.every(u=>y.arraysEqual(u,[0,0])))return At({inputs:{x:n},backend:e});if(y.sizeFromShape(n.shape)===0){let u=s.map((c,l)=>c[0]+n.shape[l]+c[1]);return vt({backend:e,attrs:{shape:u,value:a,dtype:n.dtype}})}let i=[{type:"float32",data:[a]}];s.map(u=>i.push({type:"int32",data:[u[0],u[1]]}));let p=new Jx(n.shape,s);return e.runWebGPUProgram(p,[n],n.dtype,i)},XW={kernelName:es,backendName:"webgpu",kernelFunc:Mpe};var Lpe=et({opType:fe.POW}),YW={kernelName:ts,backendName:"webgpu",kernelFunc:Lpe};function Bpe(r){let{inputs:t,backend:e}=r,{x:o,alpha:n}=t,s=new Si(fe.PRELU,o.shape,n.shape);return e.runWebGPUProgram(s,[o,n],"float32")}var QW={kernelName:rs,backendName:"webgpu",kernelFunc:Bpe};function zpe(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{axis:s,keepDims:a}=o;return Jr(n,s,a,"prod",e)}var ZW={kernelName:os,backendName:"webgpu",kernelFunc:zpe};var Vpe=r=>{let{backend:t,attrs:e}=r,{start:o,stop:n,step:s,dtype:a}=e,i=$z(o,n,s,a);return t.makeTensorInfo([i.length],a,i)},JW={kernelName:ma,backendName:"webgpu",kernelFunc:Vpe};var Wpe=et({opType:fe.DIV}),eU={kernelName:fn,backendName:"webgpu",kernelFunc:Wpe};var Upe=ye({opType:Z.RECIPROCAL}),tU={kernelName:ns,backendName:"webgpu",kernelFunc:Upe};var Gpe=ye({opType:Z.RELU}),rU={kernelName:ss,backendName:"webgpu",kernelFunc:Gpe};var Hpe=ye({opType:Z.RELU6}),oU={kernelName:us,backendName:"webgpu",kernelFunc:Hpe};var ey=class{constructor(t,e,o){this.variableNames=["x"],this.uniforms="adjustHeightWidth : vec2<f32>, halfPixelCenters : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[t[0],e,o,t[3]],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="resizeBilinear"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let d = coords[3];
let rc = coords.yz;
let effectiveInSize = vec2<f32>(
f32(uniforms.xShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.xShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveOutSize = vec2<f32>(
f32(uniforms.outShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.outShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveInputOverOutputRatioRC =
effectiveInSize / effectiveOutSize;
// Fractional source index
let sourceFracIndexRC =
(vec2<f32>(rc) + vec2<f32>(uniforms.halfPixelCenters)) *
effectiveInputOverOutputRatioRC - vec2<f32>(uniforms.halfPixelCenters);
// Compute the four integer indices.
let sourceFloorRC = vec2<i32>(sourceFracIndexRC);
let sourceCeilRC = vec2<i32>(
min(vec2<f32>(uniforms.xShape.yz) - vec2<f32>(1.0), ceil(sourceFracIndexRC)));
let topLeft = getX(b, sourceFloorRC.x, sourceFloorRC.y, d);
let bottomLeft = getX(b, sourceCeilRC.x, sourceFloorRC.y, d);
let topRight = getX(b, sourceFloorRC.x, sourceCeilRC.y, d);
let bottomRight = getX(b, sourceCeilRC.x, sourceCeilRC.y, d);
let fracRC = sourceFracIndexRC - vec2<f32>(sourceFloorRC);
let top = topLeft + (topRight - topLeft) * fracRC.y;
let bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y;
let newValue = top + (bottom - top) * fracRC.x;
setOutputAtIndex(index, newValue);
}
}
`}};function Kpe(r){let{inputs:t,backend:e,attrs:o}=r,{images:n}=t,{alignCorners:s,size:a,halfPixelCenters:i}=o,[p,u]=a,c=s&&p>1?1:0,l=s&&u>1?1:0,d=[{type:"float32",data:[c,l]},{type:"float32",data:[i?.5:0]}],f=new ey(n.shape,p,u);return e.runWebGPUProgram(f,[n],"float32",d)}var nU={kernelName:is,backendName:"webgpu",kernelFunc:Kpe};var ty=class{constructor(t,e){this.variableNames=["dy"],this.uniforms=`effectiveXSize : vec2<i32>, effectiveYSize : vec2<i32>, heightScale : f32, widthScale : f32,
2023-08-05 15:03:11 +02:00
invHeightScale : f32, invWidthScale : f32, winHeight : i32, winWidth : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.alignCorners=e,this.shaderKey=`resizeBilinearBackprop_${e}`}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let b = coords[0];
let d = coords[3];
let r = coords[1];
let c = coords[2];
var accumulator = 0.0;
// Compute bounds for where in dy we will look
let startRLerp = floor(f32(r) * uniforms.invHeightScale);
let startDyR = i32(startRLerp - f32(uniforms.winHeight / 2));
let startCLerp = floor(f32(c) * uniforms.invWidthScale);
let startDyC = i32(startCLerp - f32(uniforms.winWidth / 2));
// Loop over dy
for (var dyROffset = 0; dyROffset < uniforms.winHeight; dyROffset++) {
let dyR = startDyR + dyROffset;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= uniforms.dyShape[1]) {
continue;
}
for (var dyCOffset = 0; dyCOffset < uniforms.winWidth; dyCOffset++) {
let dyC = startDyC + dyCOffset;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= uniforms.dyShape[2]) {
continue;
}
let dxR = f32(dyR) * uniforms.heightScale;
let topDxRIndex = i32(floor(dxR));
let bottomDxRIndex = i32(min(ceil(dxR), f32(uniforms.outShape[1] - 1)));
let dxRLerp = dxR - f32(topDxRIndex);
let inverseDxRLerp = 1.0 - dxRLerp;
let dxC = f32(dyC) * uniforms.widthScale;
let leftDxCIndex = i32(floor(dxC));
let rightDxCIndex = i32(min(ceil(dxC), f32(uniforms.outShape[2] - 1)));
let dxCLerp = dxC - f32(leftDxCIndex);
let inverseDxCLerp = 1.0 - dxCLerp;
if (r == topDxRIndex && c == leftDxCIndex) {
// topLeft
accumulator +=
getDy(b, dyR, dyC, d) * inverseDxRLerp * inverseDxCLerp;
}
if (r == topDxRIndex && c == rightDxCIndex) {
// topRight
accumulator += getDy(b, dyR, dyC, d) * inverseDxRLerp * dxCLerp;
}
if (r == bottomDxRIndex && c == leftDxCIndex) {
// bottomLeft
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * inverseDxCLerp;
}
if (r == bottomDxRIndex && c == rightDxCIndex) {
// bottomRight
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * dxCLerp;
}
}
}
// End loop over dy
setOutputAtIndex(index, accumulator);
}
}
`}};function qpe(r){let{inputs:t,backend:e,attrs:o}=r,{images:n,dy:s}=t,{alignCorners:a}=o,[,i,p]=n.shape,[,u,c]=s.shape,l=[a&&u>1?i-1:i,a&&c>1?p-1:p],m=[a&&u>1?u-1:u,a&&c>1?c-1:c],d=l[0]/m[0],f=l[1]/m[1],h=1/d,g=1/f,x=Math.ceil(h)*2+2,b=Math.ceil(g)*2+2,C=new ty(n.shape,a),S=[{type:"int32",data:l},{type:"int32",data:m},{type:"float32",data:[d]},{type:"float32",data:[f]},{type:"float32",data:[h]},{type:"float32",data:[g]},{type:"int32",data:[x]},{type:"int32",data:[b]}];return e.runWebGPUProgram(C,[s],s.dtype,S)}var sU={kernelName:Ja,backendName:"webgpu",kernelFunc:qpe};var ry=class{constructor(t,e,o,n){this.variableNames=["x"],this.uniforms="adjustHeightWidth : vec2<f32>, roundBase : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[t[0],e,o,t[3]],this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.halfPixelCenters=n,this.shaderKey=`resizeNearest_${n}`}getUserCode(){let t;return this.halfPixelCenters?t="max((vec2<f32>(rc) + vec2<f32>(0.5)) * effectiveInputOverOutputRatioRC, vec2<f32>(0.0))":t="vec2<f32>(rc) * effectiveInputOverOutputRatioRC",`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let d = coords[3];
let rc = coords.yz;
let effectiveInSize = vec2<f32>(
f32(uniforms.xShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.xShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveOutSize = vec2<f32>(
f32(uniforms.outShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.outShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveInputOverOutputRatioRC =
effectiveInSize / effectiveOutSize;
// Fractional source index
2023-08-05 15:03:11 +02:00
let sourceFracIndexRC = ${t};
2022-11-18 17:13:29 +01:00
// Compute the coordinators of nearest neighbor point.
let inputShapeRC = vec2<f32>(f32(uniforms.xShape.y), f32(uniforms.xShape.z));
let sourceNearestRC = vec2<i32>(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + uniforms.roundBase)));
let newValue = getX(b, sourceNearestRC.x, sourceNearestRC.y, d);
setOutputAtIndex(index, newValue);
}
}
`}};function jpe(r){let{inputs:t,backend:e,attrs:o}=r,{images:n}=t,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=s&&p>1?1:0,l=s&&u>1?1:0,d=[{type:"float32",data:[c,l]},{type:"float32",data:[s?.5:0]}],f=new ry(n.shape,p,u,a);return e.runWebGPUProgram(f,[n],n.dtype,d)}var aU={kernelName:as,backendName:"webgpu",kernelFunc:jpe};var oy=class{constructor(t,e){this.variableNames=["dy"],this.uniforms=`effectiveXSize : vec2<i32>, effectiveYSize : vec2<i32>, invHeightScale : f32, invWidthScale : f32,
2023-08-05 15:03:11 +02:00
winHeight : i32, winWidth : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.alignCorners=e,this.shaderKey=`resizeNearestNeigborBackprop_${e}`}getUserCode(){return`
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let b = coords[0];
let d = coords[3];
let r = coords[1];
let c = coords[2];
var accumulator = 0.0;
// Compute bounds for where in dy we will look
let startRLerp = floor(f32(r) * uniforms.invHeightScale);
let startDyR = i32(floor(startRLerp - f32(uniforms.winHeight / 2)));
let startCLerp = floor(f32(c) * uniforms.invWidthScale);
let startDyC = i32(floor(startCLerp - f32(uniforms.winWidth / 2)));
// Loop over dy
for (var dyROffset = 0; dyROffset < uniforms.winHeight; dyROffset++) {
let dyR = startDyR + dyROffset;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= uniforms.dyShape[1]) {
continue;
}
for (var dyCOffset = 0; dyCOffset < uniforms.winWidth; dyCOffset++) {
let dyC = startDyC + dyCOffset;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= uniforms.dyShape[2]) {
continue;
}
let sourceFracRow = f32(uniforms.effectiveXSize[0]) *
(f32(dyR) / f32(uniforms.effectiveYSize[0]));
let sourceFracCol = f32(uniforms.effectiveXSize[1]) *
(f32(dyC) / f32(uniforms.effectiveYSize[1]));
let sourceNearestRow =
i32(min(f32(uniforms.outShape[1] - 1),
${this.alignCorners?"floor(sourceFracRow + 0.5)":"floor(sourceFracRow)"}));
let sourceNearestCol =
i32(min(f32(uniforms.outShape[2] - 1),
${this.alignCorners?"floor(sourceFracCol + 0.5)":"floor(sourceFracCol)"}));
if (r == sourceNearestRow && c == sourceNearestCol) {
accumulator += getDy(b, dyR, dyC, d);
}
}
}
// End loop over dy
setOutputAtIndex(index, accumulator);
}
}
`}};function Xpe(r){let{inputs:t,backend:e,attrs:o}=r,{images:n,dy:s}=t,{alignCorners:a}=o,[,i,p]=n.shape,[,u,c]=s.shape,l=[a&&u>1?i-1:i,a&&c>1?p-1:p],m=[a&&u>1?u-1:u,a&&c>1?c-1:c],d=l[0]/m[0],f=l[1]/m[1],h=1/d,g=1/f,x=Math.ceil(h)*2+2,b=Math.ceil(g)*2+2,C=new oy(n.shape,a),S=[{type:"int32",data:l},{type:"int32",data:m},{type:"float32",data:[h]},{type:"float32",data:[g]},{type:"int32",data:[x]},{type:"int32",data:[b]}];return e.runWebGPUProgram(C,[s],s.dtype,S)}var iU={kernelName:Za,backendName:"webgpu",kernelFunc:Xpe};var ny=class{constructor(t){this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms=" axis : vec4<i32>,",this.shaderKey="reverse"}getUserCode(){return`
2022-11-20 22:20:02 +01:00
// Using uniform variables as judging conditions, so the function has
// coherent execution within all threads.
fn getReverseCoords(coords : vec4<i32>) -> vec4<i32> {
var reverseCoords = coords;
if (uniforms.axis[0] == 1) {
reverseCoords[0] = uniforms.xShape[0] - coords[0] - 1;
}
if (uniforms.axis[1] == 1) {
reverseCoords[1] = uniforms.xShape[1] - coords[1] - 1;
}
if (uniforms.axis[2] == 1) {
reverseCoords[2] = uniforms.xShape[2] - coords[2] - 1;
}
if (uniforms.axis[3] == 1) {
reverseCoords[3] = uniforms.xShape[3] - coords[3] - 1;
}
return reverseCoords;
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let reverseCoords = getReverseCoords(coords);
setOutputAtIndex(index, getX(reverseCoords[0],
reverseCoords[1], reverseCoords[2], reverseCoords[3]));
}
}
`}};function Ype(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{dims:s}=o,a=n.shape.length;if(a===0)return At({inputs:{x:n},backend:e});let i=n.shape,p=[1,1,1,1];i.forEach((g,x)=>{let b=x+4-a;p[b]=g});let u=y.parseAxisParam(s,n.shape),c=[0,0,0,0];u.forEach(g=>{let x=g+4-a;c[x]=1});let l=[{type:"int32",data:c}],m=pe({inputs:{x:n},backend:e,attrs:{shape:p}}),d=new ny(p),f=e.runWebGPUProgram(d,[m],m.dtype,l);e.disposeData(m.dataId);let h=pe({inputs:{x:f},backend:e,attrs:{shape:i}});return e.disposeData(f.dataId),h}var uU={kernelName:ps,backendName:"webgpu",kernelFunc:Ype};var sy=class{constructor(t,e){this.outputShape=[],this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms=`centerX : f32, centerY : f32, sinRadians : f32,
2023-08-05 15:03:11 +02:00
cosRadians : f32,`,this.shaderKey="rotate",this.outputShape=t,typeof e=="number"?(this.uniforms+=" fillValue : f32,",this.fillSnippet="var outputValue = uniforms.fillValue;",this.shaderKey+="_float"):(this.uniforms+=" fillValue : vec3<f32>,",this.fillSnippet="var outputValue = uniforms.fillValue[coords[3]];",this.shaderKey+="_vec3")}getUserCode(){return`
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let coordXFloat = (f32(coords[2]) - uniforms.centerX) *
uniforms.cosRadians - (f32(coords[1]) - uniforms.centerY) *
uniforms.sinRadians;
let coordYFloat = (f32(coords[2]) - uniforms.centerX) *
uniforms.sinRadians + (f32(coords[1]) - uniforms.centerY) *
uniforms.cosRadians;
let coordX = i32(round(coordXFloat + uniforms.centerX));
let coordY = i32(round(coordYFloat + uniforms.centerY));
${this.fillSnippet}
if(coordX >= 0 && coordX < uniforms.xShape[2] && coordY >= 0 &&
coordY < uniforms.xShape[1]) {
outputValue = getX(coords[0], coordY, coordX, coords[3]);
}
setOutputAtIndex(index, outputValue);
}
}
`}};var pU={kernelName:Ds,backendName:"webgpu",kernelFunc:({inputs:r,attrs:t,backend:e})=>{let{image:o}=r,{radians:n,fillValue:s,center:a}=t,i=e,p=new sy(o.shape,s),[u,c]=w.getImageCenter(a,o.shape[1],o.shape[2]),l=[{type:"float32",data:[u]},{type:"float32",data:[c]},{type:"float32",data:[Math.sin(n)]},{type:"float32",data:[Math.cos(n)]}];return typeof s=="number"?l.push({type:"float32",data:[Number.parseFloat(s.toFixed(2))]}):l.push({type:"float32",data:s}),i.runWebGPUProgram(p,[o],o.dtype,l)}};var Qpe=ye({opType:Z.ROUND}),cU={kernelName:cs,backendName:"webgpu",kernelFunc:Qpe};var Zpe=ye({opType:Z.RSQRT,cpuKernelImpl:Ez}),lU={kernelName:ls,backendName:"webgpu",kernelFunc:Zpe};var za=class{constructor(t,e,o,n,s,a,i,p=!0){this.variableNames=["updates","indices"],this.workgroupSize=[64,1,1],this.atomic=!0,this.outputShape=a,this.type=i,this.sumDupeIndices=p,this.dispatchLayout=X(t),this.dispatch=H(this.dispatchLayout,t,this.workgroupSize),this.sliceDimGreaterThanOne=e>1,this.shaderKey=`scatter_${o}_${n}_${this.sliceDimGreaterThanOne}_${i}_${p}_${s.length}`;let u=ft(s.length);this.uniforms=`sliceDim : i32, strides: ${u}, updatesSize: i32,`,this.updatesRank=n,this.indicesRank=o}getUserCode(){let t="";this.indicesRank===1?t="coords[0]":this.indicesRank===2&&(t="coords[0], j");let e=`getIndices(${t})`,o=this.sliceDimGreaterThanOne?"uniforms.strides[j]":"uniforms.strides",n="",s="";this.dispatchLayout.x.length===1?(n="flattenedIndex",s=`
2022-11-18 17:13:29 +01:00
fn getUpdatesCoordsFromFlatIndex(index : i32) -> i32 {
return index;
}
`):this.dispatchLayout.x.length===2&&(n="vec2<i32>(flattenedIndex, coords[1])",s=`
fn getUpdatesCoordsFromFlatIndex(index : i32) -> vec2<i32> {
// N.B. |updates| could be a scalar tensor, conceptually representing a
// 2D tensor with all values equal to that. By design, its size must be
// the same as |outShape[1]| in one dimension, and |indicesShape[0]|
// gives the other.
let sliceSize = uniforms.outShape[1];
let d0 = index / sliceSize;
let d1 = index - d0 * sliceSize;
return vec2<i32>(d0, d1);
}
2023-01-06 19:23:06 +01:00
`);let i=`getUpdates(${Array.from({length:this.updatesRank},(u,c)=>`coords[${c}]`).join(", ")})`;return`
2022-11-18 17:13:29 +01:00
${s}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.updatesSize) {
2022-11-18 17:13:29 +01:00
let coords = getUpdatesCoordsFromFlatIndex(index);
var flattenedIndex = 0;
for (var j = 0; j < uniforms.sliceDim; j = j + 1) {
2023-08-05 15:03:11 +02:00
let indexInside = i32(round(${e}));
2022-11-18 17:13:29 +01:00
flattenedIndex = flattenedIndex + indexInside * ${o};
}
let updateValue =
${Cu(this.type)}(${i});
2022-11-18 17:13:29 +01:00
let flatIndex = getOutputIndexFromCoords(${n});
${this.sumDupeIndices?Yr("&result[flatIndex]","updateValue",this.type):"atomicStore(&result[flatIndex], bitcast<i32>(updateValue));"}
2022-11-18 17:13:29 +01:00
}
}`}};function Jpe(r){let{inputs:t,backend:e,attrs:o}=r,{indices:n,updates:s}=t,{shape:a}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=w.calculateShapes(s,n,a),m=[l/u,u];if(l===0)return e.makeTensorInfo(a,n.dtype);let d=pe({inputs:{x:n},backend:e,attrs:{shape:[p,i]}}),f=pe({inputs:{x:s},backend:e,attrs:{shape:[p,u]}}),h=f.dtype,g=vt({backend:e,attrs:{shape:m,value:0,dtype:h}}),x=y.sizeFromShape(f.shape),b=[{type:"int32",data:[i]},{type:"int32",data:c},{type:"int32",data:[x]}],C=new za(f.shape,i,d.shape.length,f.shape.length,c,m,h),S=e.runWebGPUProgram(C,[f,d],h,b,g),k=pe({inputs:{x:S},backend:e,attrs:{shape:a}});return e.disposeData(d.dataId),e.disposeData(f.dataId),e.disposeData(S.dataId),k}var mU={kernelName:ms,backendName:"webgpu",kernelFunc:Jpe};var ay=class{constructor(t,e){this.outputShape=[],this.variableNames=["sortedSequence","values"],this.uniforms="numInputs : i32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.side=e,this.shaderKey=`search_sorted_${e}`}getUserCode(){return`
2022-11-20 22:20:02 +01:00
fn findBound(batch: i32, value: f32) -> i32 {
var left = i32(0);
var right = uniforms.numInputs;
while (left < right) {
var mid = (left + right) / 2;
if (getSortedSequence(batch, mid) ${this.side==="left"?"<":"<="} value) {
left = mid + 1;
} else {
right = mid;
}
}
return right;
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let value = getValuesByOutputIndex(index);
setOutputAtIndexI32(index, findBound(coords[0], value));
}
}
`}};function ece(r){let{inputs:t,backend:e,attrs:o}=r,{sortedSequence:n,values:s}=t,{side:a}=o,i=new ay([s.shape[0],s.shape[1]],a),p=[{type:"int32",data:[n.shape[1]]}];return e.runWebGPUProgram(i,[n,s],"int32",p)}var dU={kernelName:fs,backendName:"webgpu",kernelFunc:ece};var iy=class{constructor(t,e,o){this.variableNames=["c","a","b"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.cRank=t,this.rank=o,this.shaderKey="select"}getUserCode(){let t,e;if(this.rank>4)throw Error(`Where for rank ${this.rank} is not yet supported`);if(this.rank===1)e="resRC",t="resRC";else{let n=["resRC.x","resRC.y","resRC.z","resRC.w"],s=[],a=[];for(let i=0;i<this.outputShape.length;i++)a.push(`${n[i]}`),i<this.cRank&&s.push(`${n[i]}`);t=s.join(),e=a.join()}return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
2023-08-05 15:03:11 +02:00
let cVal = getC(${t});
2022-11-18 17:13:29 +01:00
if (cVal >= 1.0) {
2023-08-05 15:03:11 +02:00
setOutputAtIndex(index, getA(${e}));
2022-11-18 17:13:29 +01:00
} else {
2023-08-05 15:03:11 +02:00
setOutputAtIndex(index, getB(${e}));
2022-11-18 17:13:29 +01:00
}
}
}
`}};function tce(r){let{inputs:t,backend:e}=r,{condition:o,t:n,e:s}=t,a=new iy(o.shape.length,n.shape,n.shape.length);return e.runWebGPUProgram(a,[o,n,s],dt(n.dtype,s.dtype))}var fU={kernelName:fa,backendName:"webgpu",kernelFunc:tce};var rce=ye({opType:Z.SELU}),hU={kernelName:hs,backendName:"webgpu",kernelFunc:rce};var oce=ye({opType:Z.SIGMOID}),gU={kernelName:bs,backendName:"webgpu",kernelFunc:oce};var nce=ye({opType:Z.SIGN}),xU={kernelName:ys,backendName:"webgpu",kernelFunc:nce};var sce=ye({opType:Z.SIN}),yU={kernelName:gs,backendName:"webgpu",kernelFunc:sce};var ace=ye({opType:Z.SINH}),bU={kernelName:xs,backendName:"webgpu",kernelFunc:ace};var ice=ye({opType:Z.SOFTPLUS}),CU={kernelName:Cs,backendName:"webgpu",kernelFunc:ice};var uy=class{constructor(t,e,o,n,s,a){this.variableNames=["x"],this.outputShape=[],this.uniforms="",this.workgroupSize=[64,1,1],this.size=!0;let i=new Array(n.length);for(let p=0;p<i.length;p++)i[p]=n[s[p]];this.outputShape=i,this.newDim=s,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.xShape=t,this.paddedXShape=e,this.uniforms+=`reshapedPaddedXShape : ${ft(n.length)}, paddedXShapeStrides : ${ft(a)}, `,o.map((p,u)=>{this.uniforms+=` pad${u} : vec2<i32>,`}),this.shaderKey=`spaceToBatchND_${s}`}getUserCode(){let t=ft(this.outputShape.length),e=Jv(this.newDim);return`
${um(this.paddedXShape,"PaddedX")}
2023-08-05 15:03:11 +02:00
${G("index")} {
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
2023-08-05 15:03:11 +02:00
let switchedIndex = getIndexFromCoords${this.outputShape.length}D(${t}(${e}), uniforms.reshapedPaddedXShape);
let paddedCoords = getPaddedXCoordsFromIndex(switchedIndex);
${l0(this.xShape,!0)}
2023-08-05 15:03:11 +02:00
}
}
`}};var uce=r=>{let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{blockShape:s,paddings:a}=o;y.assert(n.shape.length<=4,()=>"spaceToBatchND for rank > 4 with a WebGPU backend not implemented yet");let i=s.reduce((b,C)=>b*C),p=[[0,0]];p.push(...a);for(let b=1+s.length;b<n.shape.length;++b)p.push([0,0]);let u=p.map((b,C)=>b[0]+n.shape[C]+b[1]),c=w.getReshaped(u,s,i,!1),l=w.getPermuted(c.length,s.length,!1),m=w.getReshapedPermuted(u,s,i,!1),d=y.computeStrides(u),f=new uy(n.shape,u,p,c,l,d.length),h=[{type:"int32",data:c},{type:"int32",data:d}];p.map(b=>h.push({type:"int32",data:[b[0],b[1]]}));let g=e.runWebGPUProgram(f,[n],n.dtype,h),x=pe({inputs:{x:g},backend:e,attrs:{shape:m}});return e.disposeData(g.dataId),x},wU={kernelName:ga,backendName:"webgpu",kernelFunc:uce};var py=class{constructor(t,e,o){this.variableNames=["input","indices","segmentIds"],this.outputShape=[],this.uniforms="segmentSize : i32, sparseSize : i32,",this.workgroupSize=[64,1,1],this.atomic=!0,this.outputShape=t,this.type=o,this.dispatchLayout=X([e]),this.dispatch=H(this.dispatchLayout,[e],this.workgroupSize),this.shaderKey="sparseSegmentSum"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
if (index < uniforms.sparseSize) {
let indexInSegmentIds = index / uniforms.segmentSize;
let indexInSegment = index % uniforms.segmentSize;
let indexInInput = indices[indexInSegmentIds];
let segmentId = segmentIds[indexInSegmentIds];
let value = input[indexInInput * uniforms.segmentSize + indexInSegment];
let outIndex = segmentId * uniforms.segmentSize + indexInSegment;
${Yr("&result[outIndex]","value",this.type)}
2023-08-05 15:03:11 +02:00
}
}
`}},cy=class{constructor(t,e){this.variableNames=["segmentIds"],this.outputShape=[],this.workgroupSize=[64,1,1],this.atomic=!0,this.outputShape=[t],this.dispatchLayout=X(e),this.dispatch=H(this.dispatchLayout,e,this.workgroupSize),this.shaderKey="sparseSegmentIdCountProgram"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
if (index < uniforms.segmentIdsShape) {
let segmentId = segmentIds[index];
${Yr("&result[segmentId]","1","int32")}
2023-08-05 15:03:11 +02:00
}
}
`}},ly=class{constructor(t,e){this.variableNames=["segmentSum","sameSegmentIdCount"],this.outputShape=[],this.uniforms="segmentSize : i32",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.type=e,this.dispatchLayout=X(t),this.dispatch=H(this.dispatchLayout,t,this.workgroupSize),this.shaderKey="sparseSegmentMean"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
if (index < uniforms.size) {
let segmentId = index / uniforms.segmentSize;
let count = sameSegmentIdCount[segmentId];
if (count != 0) {
${this.type==="float32"?"setOutputAtIndex(index, segmentSum[index] / f32(count));":"setOutputAtIndexI32(index, segmentSum[index] / count);"}
}
}
2023-08-05 15:03:11 +02:00
}
`}};function my(r,t,e,o=!1,n){let a=y.sizeFromShape(r.shape)/r.shape[0],i=r.dtype,p=y.sizeFromShape(t.shape),u=n.readSync(e.dataId),l=p>0?u[p-1]+1:0,m,d=r.shape.slice();d[0]=l;let f=p*a,h=vt({backend:n,attrs:{shape:d,value:0,dtype:i}});m=new py(d,f,i);let g=[{type:"int32",data:[a]},{type:"int32",data:[f]}],x=n.runWebGPUProgram(m,[r,t,e],i,g,h);if(o)return x;let b=vt({backend:n,attrs:{shape:[l],value:0,dtype:"int32"}});m=new cy(l,e.shape);let C=n.runWebGPUProgram(m,[e],"int32",null,b),S=vt({backend:n,attrs:{shape:d,value:0,dtype:i}});m=new ly(d,i),g=[{type:"int32",data:[a]}];let k=n.runWebGPUProgram(m,[x,C],i,g,S);return n.disposeData(x.dataId),n.disposeData(C.dataId),k}function pce(r){let{inputs:t,backend:e}=r,{data:o,indices:n,segmentIds:s}=t;return my(o,n,s,!1,e)}var SU={kernelName:ya,backendName:"webgpu",kernelFunc:pce};function cce(r){let{inputs:t,backend:e}=r,{data:o,indices:n,segmentIds:s}=t;return my(o,n,s,!0,e)}var IU={kernelName:ba,backendName:"webgpu",kernelFunc:cce};var dy=class{constructor(t,e){this.variableNames=["A"],this.workgroupSize=[64,1,1],this.size=!0;let o=new Array(t.length);for(let n=0;n<o.length;n++)o[n]=t[n]*e[n];this.outputShape=o,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.rank=this.outputShape.length,this.shaderKey="tile"}getUserCode(){let t=lce(this.rank,"uniforms.");return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
2023-08-05 15:03:11 +02:00
setOutputAtIndex(index, getA(${t}));
2022-11-18 17:13:29 +01:00
}
}
`}};function lce(r,t=""){if(r>=5)throw Error(`Tile for rank ${r} is not yet supported`);if(r===1)return`(resRC % ${t}aShape)`;let e=["resRC.x","resRC.y","resRC.z","resRC.w"],o=[];for(let n=0;n<r;n++)o.push(`(${e[n]} % ${t}aShape[${n}])`);return o.join()}function ym(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{reps:s}=o;if(e.shouldExecuteOnCPU([n])||n.dtype==="string"||n.shape.length>=5){let p=e.readSync(n.dataId),u=n.dtype==="string"?p.map(m=>y.decodeString(m)):p,c=me(n.shape,n.dtype,u),l=Mz(c,s);return e.makeTensorInfo(l.shape,l.dtype,l.values)}let a=new dy(n.shape,s);return e.runWebGPUProgram(a,[n],n.dtype)}var vU={kernelName:uo,backendName:"webgpu",kernelFunc:ym};function mce(r){let{inputs:t,backend:e,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=t,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=w.calculateShapes(s,n,i),d=!1;if(s.dtype==="string"){let R=e.bufferSync(n),D=e.bufferSync(s),P=y.decodeString(e.readSync(a.dataId)[0]),O=Rz(R,D,i,m,c,u,p,l,P,d);return e.makeTensorInfo(i,O.dtype,O.values)}let f=[m/c,c],h=pe({inputs:{x:n},backend:e,attrs:{shape:[u,p]}}),g=s.shape.length?pe({inputs:{x:s},backend:e,attrs:{shape:[u,c]}}):At({inputs:{x:s},backend:e}),x=g.dtype,b=e.makeTensorInfo([],x,y.makeZerosTypedArray(1,x)),C=pe({inputs:{x:a},backend:e,attrs:{shape:Array(f.length).fill(1)}}),S=ym({inputs:{x:C},backend:e,attrs:{reps:f}}),k=y.sizeFromShape([u,c]),_=[{type:"int32",data:[p]},{type:"int32",data:l},{type:"int32",data:[k]}];switch(u){case 0:break;case 1:{let R=new za([u,c],p,h.shape.length,g.shape.length,l,f,x,d);e.runWebGPUProgram(R,[g,h],x,_,S)}break;default:{let R=new za([u,c],p,h.shape.length,b.shape.length,l,f,x,d);e.runWebGPUProgram(R,[b,h],x,_,S)}{let R=new za([u,c],p,h.shape.length,g.shape.length,l,f,x);e.runWebGPUProgram(R,[g,h],x,_,S)}}let E=pe({inputs:{x:S},backend:e,attrs:{shape:i}});return e.disposeData(h.dataId),e.disposeData(g.dataId),e.disposeData(C.dataId),e.disposeData(b.dataId),e.disposeData(S.dataId),E}var kU={kernelName:vs,backendName:"webgpu",kernelFunc:mce};function dce(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{numOrSizeSplits:s,axis:a}=o,i=y.parseAxisParam(a,n.shape)[0],p=w.prepareSplitSize(n,s,i),u=n.shape.length,c=new Array(u).fill(0),l=n.shape.slice();return p.map(m=>{let d=[...l];d[i]=m;let f=Hs({inputs:{x:n},backend:e,attrs:{begin:c,size:d}});return c[i]+=m,f})}var NU={kernelName:xa,backendName:"webgpu",kernelFunc:dce};var fce=ye({opType:Z.SQRT}),TU={kernelName:ws,backendName:"webgpu",kernelFunc:fce};var _U={kernelName:Ki,backendName:"webgpu",kernelFunc:({inputs:r,backend:t})=>{let{x:e}=r,o=t,n=new Zr(e.shape,Z.SQUARE);return o.runWebGPUProgram(n,[e],e.dtype)}};var hce=et({opType:fe.SQUARED_DIFFERENCE}),$U={kernelName:ks,backendName:"webgpu",kernelFunc:hce};function gce({inputs:r,attrs:t,backend:e}){let{x:o}=r,n=new Zr(o.shape,Z.STEP,"stepAlpha : f32,"),s=[{type:"float32",data:[t.alpha]}];return e.runWebGPUProgram(n,[o],o.dtype,s)}var EU={kernelName:wo,backendName:"webgpu",kernelFunc:gce};var fy=class{constructor(t){this.variableNames=["x"],this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]);let e=ft(this.outputShape.length);this.uniforms=`begin : ${e}, strides : ${e}, `,this.shaderKey="stridedSlice"}getUserCode(){let t=this.outputShape.length,e="";if(t===1)e="coords * uniforms.strides + uniforms.begin";else{let n=0;e=this.outputShape.map((s,a)=>(n++,this.outputShape.length===1?`coords * uniforms.strides[${a}] + uniforms.begin[${a}]`:`coords[${n-1}] * uniforms.strides[${a}] + uniforms.begin[${a}]`)).join(",")}return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
2023-08-05 15:03:11 +02:00
setOutputAtIndex(index, getX(${e}));
2022-11-18 17:13:29 +01:00
}
}
`}};function xce(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:d,finalShape:f,isIdentity:h,sliceDim0:g,isSimpleSlice:x,begin:b,end:C,strides:S}=pt.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=pe({inputs:{x:n},backend:e,attrs:{shape:f}});else if(g||x){y.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=pt.computeOutShape(b,C,S),E=Hs({inputs:{x:n},backend:e,attrs:{begin:b,size:_}});k=pe({inputs:{x:E},backend:e,attrs:{shape:f}}),e.disposeData(E.dataId)}else if(e.shouldExecuteOnCPU([n])){let E=e.readSync(n.dataId),R=me(n.shape,n.dtype,E),D=Fz(d,R,S,b);k=e.makeTensorInfo(f,n.dtype,D.values)}else{let E=new fy(d),R=[{type:"int32",data:b},{type:"int32",data:S}],D=e.runWebGPUProgram(E,[n],n.dtype,R);k=pe({inputs:{x:D},backend:e,attrs:{shape:f}}),e.disposeData(D.dataId)}return k}var RU={kernelName:Ns,backendName:"webgpu",kernelFunc:xce};function yce(r){let{inputs:t,backend:e,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=t,m=e.readSync(c.dataId),d=e.readSync(l.dataId),[f,h]=Pz(m,d,n,s,a,i,p,u);return[e.makeTensorInfo([f.length],"string",f),e.makeTensorInfo(l.shape,"int32",h)]}var DU={kernelName:Ca,backendName:"webgpu",kernelFunc:yce};var bce=et({opType:fe.SUB,cpuKernelImpl:Oz,supportsComplex:!0}),AU={kernelName:Ts,backendName:"webgpu",kernelFunc:bce};var Cce=ye({opType:Z.TAN}),FU={kernelName:_s,backendName:"webgpu",kernelFunc:Cce};var wce=ye({opType:Z.TANH}),PU={kernelName:$s,backendName:"webgpu",kernelFunc:wce};function Sce(r){let{inputs:t,backend:e,attrs:o}=r,{tensor:n,indices:s,updates:a}=t,{}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=w.calculateShapes(a,s,n.shape),m=[l/u,u];if(l===0)return e.makeTensorInfo(n.shape,s.dtype);let d=[],f=pe({inputs:{x:s},backend:e,attrs:{shape:[p,i]}});d.push(f);let h=pe({inputs:{x:a},backend:e,attrs:{shape:[p,u]}});d.push(h);let g=pe({inputs:{x:n},backend:e,attrs:{shape:m}});d.push(g);let x=ym({inputs:{x:g},backend:e,attrs:{reps:Array(m.length).fill(1)}}),b=new za([p,u],i,f.shape.length,h.shape.length,c,m,n.dtype,!1),C=y.sizeFromShape([p,u]),S=[{type:"int32",data:[i]},{type:"int32",data:c},{type:"int32",data:[C]}],k=e.runWebGPUProgram(b,[h,f],g.dtype,S,x);d.push(k);let _=pe({inputs:{x:k},backend:e,attrs:{shape:n.shape}});return d.forEach(E=>e.disposeData(E.dataId)),_}var OU={kernelName:ds,backendName:"webgpu",kernelFunc:Sce};var hy=class{constructor(t){this.variableNames=["x","indices"],this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms=`inputSize : i32, firstPass : i32, negativeInf : f32,
2022-11-18 17:13:29 +01:00
dir : i32, inc : i32,`,this.shaderKey="swap"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let outC = getCoordsFromIndex(index);
let batch = outC[0];
let elemIdx = outC[1];
// We compare elements pair-wise within a group of size 2 * inc.
// The comparing rule for each group alternates between ascending
// and descending. Within each group, we compare each pair at
// positions i and i+inc. To decide whether an element at position i
// is x0 or x1, we mod it by 2 * inc, if the result is smaller than
// inc, it is in the first half of the group, we denote it as x0,
// otherwise we denote it as x1.
// For example, as shown in the Bitonic top K paper referenced
// above, Figure5(a) shows that element[1] is in the second half of
// the group when group size is 2, but it is in the first half of
// the group when group size is 4.
let isFirstInPair = elemIdx % (2 * uniforms.inc) < uniforms.inc;
var i = 0;
if (isFirstInPair) {
i = elemIdx;
} else {
i = elemIdx - uniforms.inc;
}
var i0 = 0;
if (uniforms.firstPass == 1) {
i0 = i;
} else {
i0 = i32(getIndices(batch, i));
}
var i1 = 0;
if (uniforms.firstPass == 1) {
i1 = i + uniforms.inc;
} else {
i1 = i32(getIndices(batch, i + uniforms.inc));
}
var x0 = f32(0.0);
var x1 = f32(0.0);
if (i0 < uniforms.inputSize) {
x0 = getX(batch, i0);
} else {
x0 = uniforms.negativeInf;
}
if (i1 < uniforms.inputSize) {
x1 = getX(batch, i1);
} else {
x1 = uniforms.negativeInf;
}
let reverse = elemIdx % (2 * uniforms.dir) >= uniforms.dir;
let isGreater = x0 > x1 || (x0 == x1 && i1 > i0);
if (reverse == isGreater) {
// Elements in opposite order of direction
let iTemp = i0;
i0 = i1;
i1 = iTemp;
}
if (isFirstInPair) {
setOutputAtIndex(index, f32(i0));
} else {
setOutputAtIndex(index, f32(i1));
}
}
}
`}},gy=class{constructor(t){this.variableNames=["x","indices"],this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms="inputSize : i32, firstPass : i32, k : i32,",this.shaderKey="merge"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let outC = getCoordsFromIndex(index);
let batch = outC[0];
let elemIdx = outC[1];
// The output size is half of the previous size.
// If the previous sequence is | | | | _ _ _ _ | | | | _ _ _ _
// (k=4), we only need to output the indices at positions |, the
// indices at positions _ can be thrown away, see Figure5(b) After
// Phase 2 (Merge phase) in the Bitonic Top K paper referenced
// above.
// For example, the paper shows we only need to output the orange
// bars. The output sequence should look like this | | | | | | | |.
// Because the sequence is halved, to map the output index back to
// the previous sequence to find the corresponding value, we need
// to double the index. When we double the index, we basically
// interpolate a position, so 2i looks like
// | _ | _ | _ | _ | _ | _ | _. We move the | to the first k
// position of each 2k positions by - elemIdx % k. E.g. for output
// at index 4,5,6,7, we want to get the corresponding element at
// original index 8,9,10,11, for output at index 8,9,10,11,
// we want to get the corresponding element at original index
// 16,17,18,19, so on and so forth.
var i = 0;
if (elemIdx < uniforms.k) {
i = elemIdx;
} else {
i = elemIdx * 2 - elemIdx % uniforms.k;
}
var i0 = 0;
if (uniforms.firstPass == 1) {
i0 = i;
} else {
i0 = i32(getIndices(batch, i));
}
var i1 = 0;
if (uniforms.firstPass == 1) {
i1 = i + uniforms.k;
} else {
i1 = i32(getIndices(batch, i + uniforms.k));
}
let x0 = getX(batch, i0);
var x1 = f32(0.0);
if (i1 < uniforms.inputSize) {
x1 = getX(batch, i1);
} else {
x1 = x0;
}
if (x0 >= x1) {
setOutputAtIndex(index, f32(i0));
} else {
setOutputAtIndex(index, f32(i1));
}
}
}
`}};function el(r,t){t!==null&&r.disposeData(t.dataId)}function MU(r){let t=1;for(;t<r;)t*=2;return t}function Ice(r){let{inputs:t,backend:e,attrs:o}=r,{x:n}=t,{k:s,sorted:a}=o,i=n.shape,p=i[i.length-1];if(e.shouldExecuteOnCPU([n])){let k=e.readSync(n.dataId),[_,E]=Lz(k,i,n.dtype,s,a);return[e.makeTensorInfo(_.shape,_.dtype,_.values),e.makeTensorInfo(E.shape,E.dtype,E.values)]}if(s===0)return i[i.length-1]=0,[e.makeTensorInfo(i,n.dtype,[]),e.makeTensorInfo(i,"int32",[])];if(p===1)return[n,vt({attrs:{shape:i,dtype:"int32",value:0},backend:e})];let c=y.sizeFromShape(i)/p,l=pe({inputs:{x:n},attrs:{shape:[c,p]},backend:e}),m=MU(s),d=MU(p),f=null,h=()=>f===null?[l,l]:[l,f],g=(k,_,E)=>{let R=h(),D=new hy(E),O=[{type:"int32",data:[p]},{type:"int32",data:[f===null?1:0]},{type:"float32",data:[Number.NEGATIVE_INFINITY]},{type:"int32",data:[k]},{type:"int32",data:[_]}],M=f;f=e.runWebGPUProgram(D,R,"int32",O),el(e,M)};for(let k=1;k<m;k*=2){let _=k*2;for(let E=k;E>=1;E/=2)g(_,E,[c,d])}for(let k=d;k>m;k/=2){let _=h(),E=new gy([c,k/2]),D=[{type:"int32",data:[p]},{type:"int32",data:[f===null?1:0]},{type:"int32",data:[m]}],P=f;f=e.runWebGPUProgram(E,_,"int32",D),el(e,P);let O=m/2,M=O*2;for(let L=O;L>=1;L/=2)g(M,L,f.shape)}let x=f;f=Hs({inputs:{x:f},backend:e,attrs:{begin:0,size:[c,s]}}),el(e,x);let b=p0({inputs:{x:l,indices:f},backend:e,attrs:{axis:1,batchDims:1}});el(e,l);let C=i.slice(0,-1);C.push(s),x=f,f=pe({inputs:{x:f},attrs:{shape:C},backend:e}),el(e,x);let S=b;return b=pe({inputs:{x:b},attrs:{shape:C},backend:e}),el(e,S),[b,f]}var LU={kernelName:Es,backendName:"webgpu",kernelFunc:Ice};var xy=class{constructor(t){this.variableNames=["Image","Transforms"],this.uniforms="interpolationModeId : i32, fillModeId : i32, fillValue : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=X(this.outputShape),this.dispatch=H(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="transform"}getUserCode(){return`
2022-11-18 17:13:29 +01:00
fn mapCoord(outCoord : f32, len : f32) -> f32{
var inCoord = outCoord;
if(uniforms.fillModeId == 2) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz2 = 2.0 * len;
if (inCoord < sz2) {
inCoord = sz2 * f32(i32(f32(-inCoord / sz2))) +
inCoord;
}
if (inCoord < -len) {
inCoord = inCoord + sz2;
} else {
inCoord = -inCoord - 1.0;
}
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz2 = 2.0 * len;
inCoord = inCoord - sz2 * f32(i32(f32(inCoord / sz2)));
if (inCoord >= len) {
inCoord = sz2 - inCoord - 1.0;
}
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (uniforms.fillModeId == 3) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz = len - 1.0;
inCoord = inCoord + len * (f32(i32(f32(-inCoord / sz))) + 1.0);
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz = len - 1.0;
inCoord = inCoord - len * f32(i32(f32(inCoord / sz)));
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (uniforms.fillModeId == 4) {
return clamp(outCoord, 0.0, len - 1.0);
}
return outCoord;
}
fn readWithFillValue(batch : i32, coordY : i32, coordX : i32,
channel : i32) -> f32 {
var outputValue : f32;
if (0 <= coordY && coordY < uniforms.imageShape[1] && 0 <= coordX && coordX < uniforms.imageShape[2]) {
outputValue = getImage(batch, coordY, coordX, channel);
} else {
outputValue = uniforms.fillValue;
}
return outputValue;
}
2023-08-05 15:03:11 +02:00
${G("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var outputValue : f32;
let batch = coords[0];
let x = coords[2];
let y = coords[1];
let channel = coords[3];
let xf = f32(x);
let yf = f32(y);
let a1 = getTransforms(batch, 0);
let a2 = getTransforms(batch, 1);
let a3 = getTransforms(batch, 2);
let b1 = getTransforms(batch, 3);
let b2 = getTransforms(batch, 4);
let b3 = getTransforms(batch, 5);
let c1 = getTransforms(batch, 6);
let c2 = getTransforms(batch, 7);
let projection = c1 * xf + c2 * yf + 1.0;
if (projection == 0.0) {
outputValue = uniforms.fillValue;
} else {
let inX = (a1 * xf + a2 * yf + a3) / projection;
let inY = (b1 * xf + b2 * yf + b3) / projection;
let mapX = mapCoord(inX, f32(uniforms.imageShape[2]));
let mapY = mapCoord(inY, f32(uniforms.imageShape[1]));
if (uniforms.interpolationModeId == 1) {
let coordY = i32(round(mapY));
let coordX = i32(round(mapX));
outputValue = readWithFillValue(batch, coordY, coordX,
channel);
} else {
let yFloor = floor(mapY);
let xFloor = floor(mapX);
let yCeil = yFloor + 1.0;
let xCeil = xFloor + 1.0;
let valueYFloor = (xCeil - mapX) *
readWithFillValue(batch, i32(yFloor), i32(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, i32(yFloor), i32(xCeil), channel);
let valueYCeil = (xCeil - mapX) *
readWithFillValue(batch, i32(yCeil), i32(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, i32(yCeil), i32(xCeil), channel);
outputValue = (yCeil - mapY) * valueYFloor +
(mapY - yFloor) * valueYCeil;
}
}
setOutputAtIndex(index, outputValue);
}
}
`}};function vce(r){let{inputs:t,backend:e,attrs:o}=r,{image:n,transforms:s}=t,{interpolation:a,fillMode:i,fillValue:p,outputShape:u}=o,[c,l,m,d]=n.shape,[f,h]=u!=null?u:[l,m],g=[c,f,h,d],x=new xy(g),b=a==="nearest"?1:2,C;switch(i){case"constant":C=1;break;case"reflect":C=2;break;case"wrap":C=3;break;case"nearest":C=4;break;default:C=1;break}let S=[{type:"int32",data:[b]},{type:"int32",data:[C]},{type:"float32",data:[p]}];return e.runWebGPUProgram(x,[n,s],"float32",S)}var BU={kernelName:Rs,backendName:"webgpu",kernelFunc:vce};function kce(r){let{inputs:t,backend:e,attrs:o}=r,{value:n}=t,{axis:s}=o;s<0&&(s+=n.shape.length);let a=n,i=a.shape.length,p=n.shape[s],u=new Array(i-1),c=0;for(let h=0;h<i;h++)h!==s&&(u[c++]=a.shape[h]);let l=[],m=new Array(i).fill(0),d=a.shape.slice();d[s]=1;let f=new Array(p);for(let h=0;h<f.length;h++){m[s]=h;let g=Hs({inputs:{x:a},backend:e,attrs:{begin:m,size:d}}),x=pe({inputs:{x:g},backend:e,attrs:{shape:u}});f[h]=x,l.push(g)}return l.forEach(h=>e.disposeData(h.dataId)),f}var zU={kernelName:wa,backendName:"webgpu",kernelFunc:kce};var yy=class{constructor(t,e,o){if(this.outputShape=[],this.variableNames=["x","segmentIds"],this.uniforms="numSegments : i32, xSize: i32,",this.workgroupSize=[64,1,1],this.atomic=!0,this.outputShape=e,this.dispatchLayout=X(t),this.dispatch=H(this.dispatchLayout,t,this.workgroupSize),o!=="float32"&&o!=="int32")throw new Error(`UnsortedSegmentSum only supports float32 and int32
2023-05-08 15:12:41 +02:00
types, does not support ${o} type.`);this.type=o,this.shaderKey="unsortedSegmentSum"}getUserCode(){return`
2023-08-05 15:03:11 +02:00
${G("index")} {
2023-05-08 15:12:41 +02:00
if (index < uniforms.xSize) {
let coords = getXCoordsFromIndex(index);
let b = coords[0];
let inCol = coords[1];
let segmentId = i32(getSegmentIds(inCol));
if (segmentId >= 0) {
let flatIndex = b * uniforms.numSegments + segmentId % uniforms.numSegments;
let value = getX(b, inCol);
${Yr("&result[flatIndex]","value",this.type)}
2023-05-08 15:12:41 +02:00
}
}
}
`}};function Nce(r){let{inputs:t,backend:e,attrs:o}=r,{x:n,segmentIds:s}=t,{numSegments:a}=o,i=n.shape.length,p=[],u=0,c=w.getAxesPermutation([u],i),l=n;c!=null&&(l=xr({inputs:{x:n},backend:e,attrs:{perm:c}}),p.push(l),u=w.getInnerMostAxes(1,i)[0]);let m=w.segment_util.computeOutShape(l.shape,u,a),d=y.sizeFromShape([l.shape[u]]),f=pe({inputs:{x:l},backend:e,attrs:{shape:[-1,d]}});p.push(f);let h=n.dtype,g=[f.shape[0],a],x=vt({backend:e,attrs:{shape:g,value:0,dtype:h}}),b=new yy(f.shape,g,h),C=[{type:"int32",data:[a]},{type:"int32",data:[y.sizeFromShape(f.shape)]}],S=e.runWebGPUProgram(b,[f,s],h,C,x),k=pe({inputs:{x:S},backend:e,attrs:{shape:m}});p.push(S);let _=k;if(c!=null){p.push(k);let E=w.getUndoAxesPermutation(c);_=xr({inputs:{x:_},backend:e,attrs:{perm:E}})}return p.forEach(E=>e.disposeData(E.dataId)),_}var VU={kernelName:Yi,backendName:"webgpu",kernelFunc:Nce};var Tce=[oz,zz,Vz,Wz,Uz,Gz,Kz,qz,jz,Xz,Yz,Qz,Zz,Jz,eV,oV,nV,sV,aV,iV,pV,cV,lV,hV,gV,xV,sz,bV,wV,SV,IV,vV,kV,NV,TV,_V,$V,EV,AV,FV,PV,OV,LV,BV,MV,zV,VV,WV,UV,GV,qV,jV,XV,YV,QV,ZV,JV,eW,tW,tz,rW,sW,oW,nW,aW,iW,uW,pW,cW,lW,mW,nz,dW,CV,fW,hW,gW,xW,yW,bW,CW,SW,wW,IW,vW,kW,TW,_W,tV,$W,EW,AW,RW,DW,FW,rV,PW,OW,MW,LW,zW,HV,VW,WW,UW,mV,GW,qW,jW,XW,YW,QW,ZW,JW,dV,eU,tU,rU,oU,rz,nU,sU,aU,iU,uU,pU,cU,lU,mU,dU,fU,hU,gU,xU,yU,bU,uV,EU,RU,DU,BW,CU,wU,SU,IU,kU,NU,TU,_U,$U,AU,KV,FU,PU,OU,vU,LU,BU,Hz,zU,VU,HW];for(let r of Tce)ti(r);var WU="4.14.0",_ce="4.14.0",$ce="4.14.0",Ece="4.14.0",Rce="4.14.0",Dce="4.14.0",Ace={tfjs:WU,"tfjs-core":WU,"tfjs-converter":_ce,"tfjs-backend-cpu":$ce,"tfjs-backend-webgl":Ece,"tfjs-backend-wasm":Rce,"tfjs-backend-webgpu":Dce};var bQt=void 0;export{Xs as Abs,Vo as Acos,Wo as Acosh,Qu as AdadeltaOptimizer,Zu as AdagradOptimizer,Ju as AdamOptimizer,ep as AdamaxOptimizer,io as Add,Uo as AddN,Go as All,Ho as Any,Ys as ArgMax,Qs as ArgMin,Ko as Asin,qo as Asinh,jo as Atan,Yo as Atan2,Xo as Atanh,Qo as AvgPool,Zs as AvgPool3D,Ei as AvgPool3DGrad,$i as AvgPoolGrad,im as BackendWasm,Zo as BatchMatMul,Js as BatchToSpaceND,Jo as Bincount,qa as BitwiseAnd,ea as BroadcastArgs,Bce as BroadcastTo,yo as Cast,en as Ceil,bo as ClipByValue,Ri as Complex,Di as ComplexAbs,ta as Concat,tn as Conv2D,Ai as Conv2DBackpropFilter,rn as Conv2DBackpropInput,on as Conv3D,ja as Conv3DBackpropFilterV2,nn as Conv3DBackpropInputV2,sn as Cos,an as Cosh,cn as CropAndResize,un as Cumprod,pn as Cumsum,Bo as DataStorage,ra as DenseBincount,ln as DepthToSpace,mn as DepthwiseConv2dNative,Fi as DepthwiseConv2dNativeBackpropFilter,Pi as DepthwiseConv2dNativeBackpropInput,oa as Diag,dn as Dilation2D,Mi as Dilation2DBackpropFilter,Oi as Dilation2DBackpropInput,_u as Draw,ow as ENV,Li as Einsum,hn as Elu,Xa as EluGrad,ll as Environment,xn as Equal,gn as Erf,yn as Exp,na as ExpandDims,bn as Expm1,Bi as FFT,sa as Fill,Cn as FlipLeftRight,wn as Floor,Sn as FloorDiv,Eu as FromPixels,In as FusedBatchNorm,Io as FusedConv2D,vo as FusedDepthwiseConv2D,xp as GPGPUContext,vn as GatherNd,aa as GatherV2,Ml as GraphModel,kn as Greater,Nn as GreaterEqual,zi as IFFT,Co as Identity,Vi as Imag,Tn as IsFinite,_n as IsInf,$n as IsNan,so as KernelBackend,Bn as LRN,Ya as LRNGrad,En as LeakyRelu,Rn as Less,Dn as LessEqual,An as LinSpace,Fn as Log,Pn as Log1p,zce as LogSoftmax,On as LogicalAnd,Mn as LogicalNot,Ln as LogicalOr,E0 as LogicalXor,Vce as LowerBound,hc as MathBackendCPU,Oc as MathBackendWebGL,Wce as MatrixBandPart,zn as Max,Wn as MaxPool,ia as MaxPool3D,Ui as MaxPool3DGrad,Wi as MaxPoolGrad,ua as MaxPoolWithArgmax,Vn as Maximum,Un as Mean,Gn as Min,Hn as Minimum,Kn as MirrorPad,qn as Mod,tp as MomentumOptimizer,jn as Multinomial,Xn as Multiply,pa as Neg,Qn as NonMaxSuppressionV3,Qa as NonMaxSuppressionV4,Zn as NonMaxSuppressionV5,Yn as NotEqual,kw as OP_SCOPE_SUFFIX,Jn as OneHot,ca as OnesLike,kr as Optimizer,Dl as OptimizerConstructors,la as Pack,es as PadV2,Uce as Pool,ts as Pow,rs as Prelu,os as Prod,rp as RMSPropOptimizer,Up as RaggedGather,Gp as RaggedRange,Hp as RaggedTensorToTensor,ma as Range,hw as Rank,Gi as Real,fn as RealDiv,ns as Reciprocal,Et as Reduction,ss as Relu,us as Relu6,da as Res