human/dist/tfjs.esm.js

7084 lines
1.1 MiB
JavaScript
Raw Normal View History

2022-11-18 17:13:29 +01:00
/*
Human
homepage: <https://github.com/vladmandic/human>
author: <https://github.com/vladmandic>'
*/
var QV=Object.create;var fb=Object.defineProperty;var ZV=Object.getOwnPropertyDescriptor;var JV=Object.getOwnPropertyNames;var ez=Object.getPrototypeOf,tz=Object.prototype.hasOwnProperty;var Em=(r=>typeof require!="undefined"?require:typeof Proxy!="undefined"?new Proxy(r,{get:(e,t)=>(typeof require!="undefined"?require:e)[t]}):r)(function(r){if(typeof require!="undefined")return require.apply(this,arguments);throw new Error('Dynamic require of "'+r+'" is not supported')});var Kt=(r,e)=>()=>(e||r((e={exports:{}}).exports,e),e.exports),Be=(r,e)=>{for(var t in e)fb(r,t,{get:e[t],enumerable:!0})},rz=(r,e,t,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let n of JV(e))!tz.call(r,n)&&n!==t&&fb(r,n,{get:()=>e[n],enumerable:!(o=ZV(e,n))||o.enumerable});return r};var rp=(r,e,t)=>(t=r!=null?QV(ez(r)):{},rz(e||!r||!r.__esModule?fb(t,"default",{value:r,enumerable:!0}):t,r));var _0=Kt((Vne,N0)=>{N0.exports=wt;var Oo=null;try{Oo=new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([0,97,115,109,1,0,0,0,1,13,2,96,0,1,127,96,4,127,127,127,127,1,127,3,7,6,0,1,1,1,1,1,6,6,1,127,1,65,0,11,7,50,6,3,109,117,108,0,1,5,100,105,118,95,115,0,2,5,100,105,118,95,117,0,3,5,114,101,109,95,115,0,4,5,114,101,109,95,117,0,5,8,103,101,116,95,104,105,103,104,0,0,10,191,1,6,4,0,35,0,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,126,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,127,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,128,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,129,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,130,34,4,66,32,135,167,36,0,32,4,167,11])),{}).exports}catch(r){}function wt(r,e,t){this.low=r|0,this.high=e|0,this.unsigned=!!t}wt.prototype.__isLong__;Object.defineProperty(wt.prototype,"__isLong__",{value:!0});function Br(r){return(r&&r.__isLong__)===!0}wt.isLong=Br;var y0={},b0={};function eu(r,e){var t,o,n;return e?(r>>>=0,(n=0<=r&&r<256)&&(o=b0[r],o)?o:(t=St(r,(r|0)<0?-1:0,!0),n&&(b0[r]=t),t)):(r|=0,(n=-128<=r&&r<128)&&(o=y0[r],o)?o:(t=St(r,r<0?-1:0,!1),n&&(y0[r]=t),t))}wt.fromInt=eu;function Mo(r,e){if(isNaN(r))return e?Ji:Lo;if(e){if(r<0)return Ji;if(r>=S0)return T0}else{if(r<=-I0)return Lr;if(r+1>=I0)return k0}return r<0?Mo(-r,e).neg():St(r%Dp|0,r/Dp|0,e)}wt.fromNumber=Mo;function St(r,e,t){return new wt(r,e,t)}wt.fromBits=St;var Wm=Math.pow;function _b(r,e,t){if(r.length===0)throw Error("empty string");if(r==="NaN"||r==="Infinity"||r==="+Infinity"||r==="-Infinity")return Lo;if(typeof e=="number"?(t=e,e=!1):e=!!e,t=t||10,t<2||36<t)throw RangeError("radix");var o;if((o=r.indexOf("-"))>0)throw Error("interior hyphen");if(o===0)return _b(r.substring(1),e,t).neg();for(var n=Mo(Wm(t,8)),s=Lo,a=0;a<r.length;a+=8){var i=Math.min(8,r.length-a),p=parseInt(r.substring(a,a+i),t);if(i<8){var u=Mo(Wm(t,i));s=s.mul(u).add(Mo(p))}else s=s.mul(n),s=s.add(Mo(p))}return s.unsigned=e,s}wt.fromString=_b;function ts(r,e){return typeof r=="number"?Mo(r,e):typeof r=="string"?_b(r,e):St(r.low,r.high,typeof e=="boolean"?e:r.unsigned)}wt.fromValue=ts;var C0=1<<16,vz=1<<24,Dp=C0*C0,S0=Dp*Dp,I0=S0/2,w0=eu(vz),Lo=eu(0);wt.ZERO=Lo;var Ji=eu(0,!0);wt.UZERO=Ji;var Fp=eu(1);wt.ONE=Fp;var v0=eu(1,!0);wt.UONE=v0;var Nb=eu(-1);wt.NEG_ONE=Nb;var k0=St(-1,2147483647,!1);wt.MAX_VALUE=k0;var T0=St(-1,-1,!0);wt.MAX_UNSIGNED_VALUE=T0;var Lr=St(0,-2147483648,!1);wt.MIN_VALUE=Lr;var ce=wt.prototype;ce.toInt=function(){return this.unsigned?this.low>>>0:this.low};ce.toNumber=function(){return this.unsigned?(this.high>>>0)*Dp+(this.low>>>0):this.high*Dp+(this.low>>>0)};ce.toString=function(e){if(e=e||10,e<2||36<e)throw RangeError("radix");if(this.isZero())return"0";if(this.isNegative())if(this.eq(Lr)){var t=Mo(e),o=this.div(t),n=o.mul(t).sub(this);return o.toString(e)+n.toInt().toString(e)}else return"-"+this.neg().toString(e);for(var s=Mo(Wm(e,6),this.unsigned),a=th
`),V=F=>M.writeSync(2,F+`
`));var G=u.print||W,q=u.printErr||V;Object.assign(u,f),f=null,u.arguments&&(d=u.arguments),u.thisProgram&&(h=u.thisProgram),u.quit&&(g=u.quit);var H=4,j=Atomics.load,Y=Atomics.store,Z=Atomics.compareExchange,ee;u.wasmBinary&&(ee=u.wasmBinary);var X=u.noExitRuntime||!0;typeof WebAssembly!="object"&&Xu("no native wasm support detected");var Q,se,ie=!1,de;function Ie(F,B){F||Xu(B)}var Se=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):void 0;function Ee(F,B,re){for(var le=B+re,Te=B;F[Te]&&!(Te>=le);)++Te;if(Te-B>16&&F.buffer&&Se)return Se.decode(F.buffer instanceof SharedArrayBuffer?F.slice(B,Te):F.subarray(B,Te));for(var Ze="";B<Te;){var $e=F[B++];if(!($e&128)){Ze+=String.fromCharCode($e);continue}var Pe=F[B++]&63;if(($e&224)==192){Ze+=String.fromCharCode(($e&31)<<6|Pe);continue}var Wt=F[B++]&63;if(($e&240)==224?$e=($e&15)<<12|Pe<<6|Wt:$e=($e&7)<<18|Pe<<12|Wt<<6|F[B++]&63,$e<65536)Ze+=String.fromCharCode($e);else{var Zr=$e-65536;Ze+=String.fromCharCode(55296|Zr>>10,56320|Zr&1023)}}return Ze}function Me(F,B){return F?Ee(o(),F,B):""}function st(F,B,re,le){if(!(le>0))return 0;for(var Te=re,Ze=re+le-1,$e=0;$e<F.length;++$e){var Pe=F.charCodeAt($e);if(Pe>=55296&&Pe<=57343){var Wt=F.charCodeAt(++$e);Pe=65536+((Pe&1023)<<10)|Wt&1023}if(Pe<=127){if(re>=Ze)break;B[re++]=Pe}else if(Pe<=2047){if(re+1>=Ze)break;B[re++]=192|Pe>>6,B[re++]=128|Pe&63}else if(Pe<=65535){if(re+2>=Ze)break;B[re++]=224|Pe>>12,B[re++]=128|Pe>>6&63,B[re++]=128|Pe&63}else{if(re+3>=Ze)break;B[re++]=240|Pe>>18,B[re++]=128|Pe>>12&63,B[re++]=128|Pe>>6&63,B[re++]=128|Pe&63}}return B[re]=0,re-Te}function pt(F,B,re){return st(F,o(),B,re)}var De,ft,at,dt,It,Fr,Pt,jr,er;w&&(De=u.buffer);function Tt(F){De=F,u.HEAP8=ft=new Int8Array(F),u.HEAP16=dt=new Int16Array(F),u.HEAP32=Fr=new Int32Array(F),u.HEAPU8=at=new Uint8Array(F),u.HEAPU16=It=new Uint16Array(F),u.HEAPU32=Pt=new Uint32Array(F),u.HEAPF32=jr=new Float32Array(F),u.HEAPF64=er=new Float64Array(F)}var tr=u.INITIAL_MEMORY||16777216;if(w)Q=u.wasmMemory,De=u.buffer;else if(u.wasmMemory)Q=u.wasmMemory;else if(Q=new WebAssembly.Memory({initial:tr/65536,maximum:32768,shared:!0}),!(Q.buffer instanceof SharedArrayBuffer))throw q("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),C&&console.log("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and also use a recent version)"),Error("bad memory");Q&&(De=Q.buffer),tr=De.byteLength,Tt(De);var rr,Xr=[],Yr=[],pr=[],Qs=!1;function tn(){return X}function Ua(){if(u.preRun)for(typeof u.preRun=="function"&&(u.preRun=[u.preRun]);u.preRun.length;)Lc(u.preRun.shift());Uc(Xr)}function jt(){Qs=!0,!w&&Uc(Yr)}function Zs(){if(!w){if(u.postRun)for(typeof u.postRun=="function"&&(u.postRun=[u.postRun]);u.postRun.length;)KS(u.postRun.shift());Uc(pr)}}function Lc(F){Xr.unshift(F)}function Bc(F){Yr.unshift(F)}function KS(F){pr.unshift(F)}var Ga=0,ju=null,Js=null;function jS(F){Ga++,u.monitorRunDependencies&&u.monitorRunDependencies(Ga)}function XS(F){if(Ga--,u.monitorRunDependencies&&u.monitorRunDependencies(Ga),Ga==0&&(ju!==null&&(clearInterval(ju),ju=null),Js)){var B=Js;Js=null,B()}}function Xu(F){w?postMessage({cmd:"onAbort",arg:F}):u.onAbort&&u.onAbort(F),F="Aborted("+F+")",q(F),ie=!0,de=1,F+=". Build with -sASSERTIONS for more info.";var B=new WebAssembly.RuntimeError(F);throw l(B),B}var $x="data:application/octet-stream;base64,";function nm(F){return F.startsWith($x)}function Vc(F){return F.startsWith("file://")}var dr;dr="tfjs-backend-wasm-threaded-simd.wasm",nm(dr)||(dr=_(dr));function sm(F){try{if(F==dr&&ee)return new Uint8Array(ee);if(A)return A(F);throw"both async and sync fetching of the wasm failed"}catch(B){Xu(B)}}function Rx(){if(!ee&&(y||b)){if(typeof fetch=="function"&&!Vc(dr))return fetch(dr,{credentials:"same-origin"}).then(function(F){if(!F.ok)throw"failed to load wasm binary file at '"+dr+"'";return F.arrayBuffer()}).catch(function(){return sm(dr)});if(R)return new Pr
");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}WasmBackendModuleThreadedSimd(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};`});var V3=Kt((Dg,tS)=>{var eS=(()=>{var r=typeof document!="undefined"&&document.currentScript?document.currentScript.src:void 0;return typeof __filename!="undefined"&&(r=r||__filename),function(e){e=e||{};var t=typeof e!="undefined"?e:{},o,n;t.ready=new Promise(function(U,te){o=U,n=te});var s;typeof process!="undefined"&&process.listeners&&(s={uncaughtException:process.listeners("uncaughtException"),unhandledRejection:process.listeners("unhandledRejection")});var a=Object.assign({},t),i=[],p="./this.program",u=(U,te)=>{throw te},c=typeof window=="object",l=typeof importScripts=="function",m=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string",f="";function d(U){return t.locateFile?t.locateFile(U,f):f+U}var h,g,y,b;function C(U){if(U instanceof ju)return;E("exiting due to exception: "+U)}if(m){l?f=Vl().dirname(f)+"/":f=__dirname+"/";var w,k;typeof Em=="function"&&(w=Qw(),k=Vl()),h=(U,te)=>(U=k.normalize(U),w.readFileSync(U,te?void 0:"utf8")),y=U=>{var te=h(U,!0);return te.buffer||(te=new Uint8Array(te)),te},g=(U,te,ve)=>{U=k.normalize(U),w.readFile(U,function(Ke,Nt){Ke?ve(Ke):te(Nt.buffer)})},process.argv.length>1&&(p=process.argv[1].replace(/\\/g,"/")),i=process.argv.slice(2),process.on("uncaughtException",function(U){if(!(U instanceof ju))throw U}),process.on("unhandledRejection",function(U){throw U}),u=(U,te)=>{if(at())throw process.exitCode=U,te;C(te),process.exit(U)},t.inspect=function(){return"[Emscripten Module object]"}}else(c||l)&&(l?f=self.location.href:typeof document!="undefined"&&document.currentScript&&(f=document.currentScript.src),r&&(f=r),f.indexOf("blob:")!==0?f=f.substr(0,f.replace(/[?#].*/,"").lastIndexOf("/")+1):f="",h=U=>{var te=new XMLHttpRequest;return te.open("GET",U,!1),te.send(null),te.responseText},l&&(y=U=>{var te=new XMLHttpRequest;return te.open("GET",U,!1),te.responseType="arraybuffer",te.send(null),new Uint8Array(te.response)}),g=(U,te,ve)=>{var Ke=new XMLHttpRequest
`)),p.join(`
`)}function Dz(r,e,t,o){let n=Ve(e),s=o[o.length-1],a=new Array(s).fill(0),i=e.length,p=t==="complex64"?nl(r):r;if(i>1)for(let u=0;u<n/s;u++){let c=u*s;for(let l=0;l<s;l++)a[l]=Math.max(a[l],ol(p[c+l],0,t).length)}return a}function ol(r,e,t){let o;return Array.isArray(r)?o=`${parseFloat(r[0].toFixed(Ab))} + ${parseFloat(r[1].toFixed(Ab))}j`:nn(r)?o=`'${r}'`:t==="bool"?o=O0(r):o=parseFloat(r.toFixed(Ab)).toString(),Mi(o,e)}function O0(r){return r===0?"false":"true"}function Hm(r,e,t,o,n,s=!0){let a=t==="complex64"?2:1,i=e[0],p=e.length;if(p===0){if(t==="complex64"){let h=nl(r);return[ol(h[0],0,t)]}return t==="bool"?[O0(r[0])]:[r[0].toString()]}if(p===1){if(i>D0){let g=rl*a,y=Array.from(r.slice(0,g)),b=Array.from(r.slice((i-rl)*a,i*a));return t==="complex64"&&(y=nl(y),b=nl(b)),["["+y.map((C,w)=>ol(C,n[w],t)).join(", ")+", ..., "+b.map((C,w)=>ol(C,n[i-rl+w],t)).join(", ")+"]"]}return["["+(t==="complex64"?nl(r):Array.from(r)).map((g,y)=>ol(g,n[y],t)).join(", ")+"]"]}let u=e.slice(1),c=o.slice(1),l=o[0]*a,m=[];if(i>D0){for(let h=0;h<rl;h++){let g=h*l,y=g+l;m.push(...Hm(r.slice(g,y),u,t,c,n,!1))}m.push("...");for(let h=i-rl;h<i;h++){let g=h*l,y=g+l;m.push(...Hm(r.slice(g,y),u,t,c,n,h===i-1))}}else for(let h=0;h<i;h++){let g=h*l,y=g+l;m.push(...Hm(r.slice(g,y),u,t,c,n,h===i-1))}let f=p===2?",":"";m[0]="["+m[0]+f;for(let h=1;h<m.length-1;h++)m[h]=" "+m[h]+f;let d=`,
`;for(let h=2;h<p;h++)d+=`
`;return m[m.length-1]=" "+m[m.length-1]+"]"+(s?"":d),m}function nl(r){let e=[];for(let t=0;t<r.length;t+=2)e.push([r[t],r[t+1]]);return e}var je=class{constructor(e,t,o){if(this.dtype=t,this.shape=e.slice(),this.size=Ve(e),o!=null){let n=o.length;$(n===this.size,()=>`Length of values '${n}' does not match the size inferred by the shape '${this.size}'.`)}if(t==="complex64")throw new Error("complex64 dtype TensorBuffers are not supported. Please create a TensorBuffer for the real and imaginary parts separately and call tf.complex(real, imag).");this.values=o||gb(t,this.size),this.strides=ds(e)}set(e,...t){t.length===0&&(t=[0]),$(t.length===this.rank,()=>`The number of provided coordinates (${t.length}) must match the rank (${this.rank})`);let o=this.locToIndex(t);this.values[o]=e}get(...e){e.length===0&&(e=[0]);let t=0;for(let n of e){if(n<0||n>=this.shape[t]){let s=`Requested out of range element at ${e}. Buffer shape=${this.shape}`;throw new Error(s)}t++}let o=e[e.length-1];for(let n=0;n<e.length-1;++n)o+=this.strides[n]*e[n];return this.values[o]}locToIndex(e){if(this.rank===0)return 0;if(this.rank===1)return e[0];let t=e[e.length-1];for(let o=0;o<e.length-1;++o)t+=this.strides[o]*e[o];return t}indexToLoc(e){if(this.rank===0)return[];if(this.rank===1)return[e];let t=new Array(this.shape.length);for(let o=0;o<t.length-1;++o)t[o]=Math.floor(e/this.strides[o]),e-=t[o]*this.strides[o];return t[t.length-1]=e,t}get rank(){return this.shape.length}toTensor(){return rs().makeTensor(this.values,this.shape,this.dtype)}},rs=null,Mp=null,Pz=null;function M0(r){rs=r}function L0(r){Mp=r}function B0(r){Pz=r}var ut=class{constructor(e,t,o,n){this.kept=!1,this.isDisposedInternal=!1,this.shape=e.slice(),this.dtype=t||"float32",this.size=Ve(e),this.strides=ds(e),this.dataId=o,this.id=n,this.rankType=this.rank<5?this.rank.toString():"higher"}get rank(){return this.shape.length}async buffer(){let e=await this.data();return Mp.buffer(this.shape,this.dtype,e)}bufferSync(){return Mp.buffer(this.shape,this.dtype,this.dataSync())}async array(){let e=await this.data();return Oi(this.shape,e,this.dtype==="complex64")}arraySync(){return Oi(this.shape,this.dataSync(),this.dtype==="complex64")}async data(){this.throwIfDisposed();let e=rs().read(this.dataId);if(this.dtype==="string"){let t=await e;try{return t.map(o=>Op(o))}catch(o){throw new Error("Failed to decode the string bytes into utf-8. To get the original bytes, call tensor.bytes().")}}return e}dataToGPU(e){return this.throwIfDisposed(),rs().readToGPU(this.dataId,e)}dataSync(){this.throwIfDisposed();let e=rs().readSync(this.dataId);if(this.dtype==="string")try{return e.map(t=>Op(t))}catch(t){throw new Error("Failed to decode the string bytes into utf-8. To get the original bytes, call tensor.bytes().")}return e}async bytes(){this.throwIfDisposed();let e=await rs().read(this.dataId);return this.dtype==="string"?e:new Uint8Array(e.buffer)}dispose(){this.isDisposed||(rs().disposeTensor(this),this.isDisposedInternal=!0)}get isDisposed(){return this.isDisposedInternal}throwIfDisposed(){if(this.isDisposed)throw new Error("Tensor is disposed.")}print(e=!1){return Mp.print(this,e)}clone(){return this.throwIfDisposed(),Mp.clone(this)}toString(e=!1){let t=this.dataSync();return P0(t,this.shape,this.dtype,e)}cast(e){return this.throwIfDisposed(),Mp.cast(this,e)}variable(e=!0,t,o){return this.throwIfDisposed(),rs().makeVariable(this,e,t,o)}};Object.defineProperty(ut,Symbol.hasInstance,{value:r=>!!r&&r.data!=null&&r.dataSync!=null&&r.throwIfDisposed!=null});function Oz(){return Zc("Tensor",()=>ut)}Oz();var ba=class extends ut{constructor(e,t,o,n){super(e.shape,e.dtype,e.dataId,n),this.trainable=t,this.name=o}assign(e){if(e.dtype!==this.dtype)throw new Error(`dtype of the new value (${e.dtype}) and previous value (${this.dtype}) must match`);if(!Or(e.shape,this.shape))throw new Error(`shape of the new value (${e.shape}) and previous value (${this.shape}) must match`);rs().disposeTensor(this),this.dataId=e.dataId,rs().incRef(this,null)}dispose(){rs().disposeVariable(this),this.isDisposedInternal=!0}};Ob
Manifest JSON has weights with names: ${i.join(", ")}.`)}let p=n.reduce((f,d,h)=>(d&&f.push(h),f),[]),u=[];p.forEach(f=>{e[f].paths.forEach(d=>{let h=t+(t.endsWith("/")?"":"/")+d;u.push(h)})});let c=await r(u),l={},m=0;return p.forEach(f=>{let d=e[f].paths.length,h=0;for(let w=0;w<d;w++)h+=c[m+w].byteLength;let g=new ArrayBuffer(h),y=new Uint8Array(g),b=0;for(let w=0;w<d;w++){let k=new Uint8Array(c[m+w]);y.set(k,b),b+=k.byteLength}s[f].forEach(w=>{let k=g.slice(w.groupOffset,w.groupOffset+w.sizeBytes),_=jm(k,[w.manifestEntry]);for(let E in _)l[E]=_[E]}),m+=d}),l}}var hW="application/octet-stream",gW="application/json",il=class{constructor(e,t){if(this.DEFAULT_METHOD="POST",t==null&&(t={}),this.weightPathPrefix=t.weightPathPrefix,this.onProgress=t.onProgress,this.weightUrlConverter=t.weightUrlConverter,t.fetchFunc!=null?($(typeof t.fetchFunc=="function",()=>"Must pass a function that matches the signature of `fetch` (see https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API)"),this.fetch=t.fetchFunc):this.fetch=P().platform.fetch,$(e!=null&&e.length>0,()=>"URL path for http must not be null, undefined or empty."),Array.isArray(e)&&$(e.length===2,()=>`URL paths for http must have a length of 2, (actual length is ${e.length}).`),this.path=e,t.requestInit!=null&&t.requestInit.body!=null)throw new Error("requestInit is expected to have no pre-existing body, but has one.");this.requestInit=t.requestInit||{}}async save(e){if(e.modelTopology instanceof ArrayBuffer)throw new Error("BrowserHTTPRequest.save() does not support saving model topology in binary formats yet.");let t=Object.assign({method:this.DEFAULT_METHOD},this.requestInit);t.body=new FormData;let o=[{paths:["./model.weights.bin"],weights:e.weightSpecs}],n=Xm(e,o);t.body.append("model.json",new Blob([JSON.stringify(n)],{type:gW}),"model.json"),e.weightData!=null&&t.body.append("model.weights.bin",new Blob([e.weightData],{type:hW}),"model.weights.bin");let s=await this.fetch(this.path,t);if(s.ok)return{modelArtifactsInfo:As(e),responses:[s]};throw new Error(`BrowserHTTPRequest.save() failed due to HTTP response status ${s.status}.`)}async load(){let e=await this.fetch(this.path,this.requestInit);if(!e.ok)throw new Error(`Request to ${this.path} failed with status code ${e.status}. Please verify this URL points to the model JSON of the model to load.`);let t;try{t=await e.json()}catch(s){let a=`Failed to parse model JSON of response from ${this.path}.`;throw this.path.endsWith(".pb")?a+=" Your path contains a .pb file extension. Support for .pb models have been removed in TensorFlow.js 1.0 in favor of .json models. You can re-convert your Python TensorFlow model using the TensorFlow.js 1.0 conversion scripts or you can convert your.pb models with the 'pb2json'NPM script in the tensorflow/tfjs-converter repository.":a+=" Please make sure the server is serving valid JSON for this request.",new Error(a)}let o=t.modelTopology,n=t.weightsManifest;if(o==null&&n==null)throw new Error(`The JSON from HTTP path ${this.path} contains neither model topology or manifest for weights.`);return Bp(t,s=>this.loadWeights(s))}async loadWeights(e){let t=Array.isArray(this.path)?this.path[1]:this.path,[o,n]=xW(t),s=this.weightPathPrefix||o,a=Ym(e),i=[],p=[];for(let c of e)for(let l of c.paths)this.weightUrlConverter!=null?p.push(this.weightUrlConverter(l)):i.push(s+l+n);this.weightUrlConverter&&i.push(...await Promise.all(p));let u=await tC(i,{requestInit:this.requestInit,fetchFunc:this.fetch,onProgress:this.onProgress});return[a,Lp(u)]}};il.URL_SCHEME_REGEX=/^https?:\/\//;function xW(r){let e=r.lastIndexOf("/"),t=r.lastIndexOf("?"),o=r.substring(0,e),n=t>e?r.substring(t):"";return[o+"/",n]}function tf(r){return r.match(il.URL_SCHEME_REGEX)!=null}var dv=(r,e)=>{if(typeof fetch=="undefined"&&(e==null||e.fetchFunc==null))return null;{let t=!0;if(Array.isArray(r)?t=r.every(o=>tf(o)):t=tf(r),t)return rf(r,e)}return null};mt.registerSaveRouter(dv);mt.registerLoadRouter(dv);function rf(r,e){return new il(r,e)}function hv(r,e){return rf(r,e)}var ul=class{constructor(e){this.modelArtifacts=e}
Actual: ${n}.
Expected: ${s}.`);for(let a=0;a<s.length;++a){let i=n[a],p=s[a];if(!t(i,p))throw new Error(`Arrays differ: actual[${a}] = ${i}, expected[${a}] = ${p}.
Actual: ${n}.
Expected: ${s}.`)}typeof expect!="undefined"&&expect().nothing()}function GW(r,e){r().then(()=>e.fail(),()=>e()),typeof expect!="undefined"&&expect().nothing()}function HW(r,e){let t=typeof e=="string"||typeof e=="number"||typeof e=="boolean"?[e]:e;return nn(r)||nn(r[0])||nn(e)||nn(e[0])?uC(r,t,(o,n)=>o==n):uC(r,e,(o,n)=>pC(o,n,0))}function Mv(r,e,t){if(t==null&&(t=pf()),!pC(r,e,t))throw new Error(`Numbers differ: actual === ${r}, expected === ${e}`);typeof expect!="undefined"&&expect().nothing()}function pC(r,e,t){return!isFinite(r)&&!isFinite(e)?!0:!(isNaN(r)||isNaN(e)||Math.abs(r-e)>t)}function qW(r,e,t){for(let o=0;o<r.length;o++)if(r[o]<e||r[o]>t)throw new Error(`Value out of range:${r[o]} low: ${e}, high: ${t}`)}function KW(r,e){let t=new Float32Array(r),o=new Float32Array(e);if(t.length!==o.length)throw new Error(`Expected ArrayBuffer to be of length ${o.length}, but it was ${t.length}`);for(let n=0;n<o.length;n++)if(t[n]!==o[n])throw new Error(`Expected ArrayBuffer value at ${n} to be ${o[n]} but got ${t[n]} instead`)}function Lv(r){for(let e=0;e<r.length;e++){let t=r[e];Array.isArray(t)?Lv(t):r[e]=si(t)}return r}function jW(r){let e=document.createElement("video");return"playsInline"in e&&(e.playsInline=!0),e.muted=!0,e.loop=!0,e.style.position="fixed",e.style.left="0px",e.style.top="0px",e.preload="auto",e.appendChild(r),new Promise(t=>{e.addEventListener("loadeddata",o=>t(e)),e.load()})}async function XW(r){await r.play(),"requestVideoFrameCallback"in r&&await new Promise(e=>{r.requestVideoFrameCallback(e)})}var YW="4.0.0";function QW(r,e){let t=v(r,"a","add"),o=v(e,"b","add");[t,o]=Re(t,o);let n={a:t,b:o};return N.runKernel(_r,n)}var ge=T({add_:QW});function ZW(r,e){let t=v(r,"a","floorDiv"),o=v(e,"b","floorDiv");[t,o]=Re(t,o);let n={a:t,b:o};return N.runKernel(vn,n)}var cf=T({floorDiv_:ZW});function JW(r,e){let t=v(r,"a","div"),o=v(e,"b","div");if([t,o]=Re(t,o),t.dtype==="int32"&&o.dtype==="int32")return cf(t,o);let n={a:t,b:o},s={};return N.runKernel(Cn,n,s)}var We=T({div_:JW});function eU(r,e){let t=v(r,"a","mul"),o=v(e,"b","mul");[t,o]=Re(t,o);let n={a:t,b:o};return N.runKernel(ho,n)}var oe=T({mul_:eU});function tU(r){let e=v(r,"x","abs");if(e.dtype==="complex64"){let t={x:e};return N.runKernel(cp,t)}else{let t={x:e};return N.runKernel(sn,t)}}var Qt=T({abs_:tU});function rU(r){let t={x:v(r,"x","acos")};return N.runKernel(Li,t)}var Vv=T({acos_:rU});function oU(r){let t={x:v(r,"x","acosh")};return N.runKernel(Bi,t)}var zv=T({acosh_:oU});function nU(r){$(Array.isArray(r),()=>"The argument passed to tf.addN() must be a list of tensors"),$(r.length>=1,()=>`Must pass at least one tensor to tf.addN(), but got ${r.length}`);let e=r.map((n,s)=>v(n,`tensors${s}`,"addN")),t=e[0];e.forEach(n=>{if(n.dtype!==t.dtype)throw new Error("All tensors passed to tf.addN() must have the same dtype")}),e.forEach(n=>{if(!Or(n.shape,t.shape))throw new Error("All tensors passed to tf.addN() must have the same shape")});let o=e;return N.runKernel(an,o)}var Wv=T({addN_:nU});function sU(r,e=null,t=!1){let n={x:v(r,"x","all","bool")},s={axis:e,keepDims:t};return N.runKernel(oa,n,s)}var Uv=T({all_:sU});function aU(r,e=null,t=!1){let n={x:v(r,"x","any","bool")},s={axis:e,keepDims:t};return N.runKernel(na,n,s)}var Gv=T({any_:aU});function iU(r,e=0){let o={x:v(r,"x","argMax")},n={axis:e};return N.runKernel(un,o,n)}var Hv=T({argMax_:iU});function uU(r,e=0){let o={x:v(r,"x","argMin")},n={axis:e};return N.runKernel(ja,o,n)}var qv=T({argMin_:uU});function pU(r){let t={x:v(r,"x","asin")};return N.runKernel(Vi,t)}var Kv=T({asin_:pU});function cU(r){let t={x:v(r,"x","asinh")};return N.runKernel(zi,t)}var jv=T({asinh_:cU});function lU(r){let t={x:v(r,"x","atan")};return N.runKernel(Wi,t)}var Xv=T({atan_:lU});function mU(r,e){let t=v(r,"a","atan2"),o=v(e,"b","atan2");[t,o]=Re(t,o);let n={a:t,b:o};return N.runKernel(sa,n)}var Yv=T({atan2_:mU});function fU(r){let t={x:v(r,"x","atanh")};return N.runKernel(Ui,t)}var Qv=T({atanh_:fU});function dU(r,e,t,o,n="NHWC",s){let a=r[3],i=[...e,a],p=Jv(n);return uu(r,i,t,s,o,null,null,p)}function lC(r,e,t,o,n,s,
with dtype ${s.dtype}. `)}),t.length===1)return zr(t[0]);let o=t,n={axis:e};return N.runKernel(gs,o,n)}var gt=T({concat_:SU});function vU(r){let t={x:v(r,"x","sigmoid","float32")};return N.runKernel(yo,t)}var Ms=T({sigmoid_:vU});function kU(r,e,t){let o=v(r,"x","slice","string_or_numeric");if(o.rank===0)throw new Error("Slicing scalar is not possible");let n={x:o},s={begin:e,size:t};return N.runKernel(qn,n,s)}var Ue=T({slice_:kU});function TU(r){let t={x:v(r,"x","tanh","float32")};return N.runKernel(Qn,t)}var ml=T({tanh_:TU});function NU(r,e,t,o,n,s){let a=v(r,"forgetBias","basicLSTMCell"),i=v(e,"lstmKernel","basicLSTMCell"),p=v(t,"lstmBias","basicLSTMCell"),u=v(o,"data","basicLSTMCell"),c=v(n,"c","basicLSTMCell"),l=v(s,"h","basicLSTMCell"),m=gt([u,l],1),f=Xe(m,i),d=ge(f,p),h=d.shape[0],g=d.shape[1]/4,y=[h,g],b=Ue(d,[0,0],y),C=Ue(d,[0,g],y),w=Ue(d,[0,g*2],y),k=Ue(d,[0,g*3],y),_=ge(oe(Ms(b),ml(C)),oe(c,Ms(ge(a,w)))),E=oe(ml(_),Ms(k));return[_,E]}var tk=T({basicLSTMCell_:NU});function _U(r,e,t){let o=v(r,"x","batchToSpaceND"),n=e.reduce((i,p)=>i*p);$(o.rank>=1+e.length,()=>`input rank is ${o.rank} but should be > than blockShape.length ${e.length}`),$(t.length===e.length,()=>`crops.length is ${t.length} but should be equal to blockShape.length ${e.length}`),$(o.shape[0]%n===0,()=>`input tensor batch is ${o.shape[0]} but is not divisible by the product of the elements of blockShape ${e.join(" * ")} === ${n}`);let s={x:o},a={blockShape:e,crops:t};return N.runKernel(hs,s,a)}var ff=T({batchToSpaceND_:_U});function rk(r){let e;return r.rank===0||r.rank===1?e=z(r,[1,1,1,r.size]):r.rank===2?e=z(r,[1,1,r.shape[0],r.shape[1]]):r.rank===3?e=z(r,[1,r.shape[0],r.shape[1],r.shape[2]]):e=r,e}function EU(r,e,t,o,n,s){s==null&&(s=.001);let a=v(r,"x","batchNorm"),i=v(e,"mean","batchNorm"),p=v(t,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;o!=null&&(c=v(o,"offset","batchNorm")),$(i.rank===p.rank,()=>"Batch normalization gradient requires mean and variance to have equal ranks."),$(c==null||i.rank===c.rank,()=>"Batch normalization gradient requires mean and offset to have equal ranks."),$(u==null||i.rank===u.rank,()=>"Batch normalization gradient requires mean and scale to have equal ranks.");let m={x:rk(a),scale:u,offset:c,mean:i,variance:p},f={varianceEpsilon:s},d=N.runKernel(kn,m,f);return z(d,a.shape)}var li=T({batchNorm_:EU});function $U(r,e,t,o,n,s){let a=v(r,"x","batchNorm"),i=v(e,"mean","batchNorm"),p=v(t,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;return o!=null&&(c=v(o,"offset","batchNorm")),$(a.rank===2,()=>`Error in batchNorm2D: x must be rank 2 but got rank ${a.rank}.`),$(i.rank===2||i.rank===1,()=>`Error in batchNorm2D: mean must be rank 2 or rank 1 but got rank ${i.rank}.`),$(p.rank===2||p.rank===1,()=>`Error in batchNorm2D: variance must be rank 2 or rank 1 but got rank ${p.rank}.`),u!=null&&$(u.rank===2||u.rank===1,()=>`Error in batchNorm2D: scale must be rank 2 or rank 1 but got rank ${u.rank}.`),c!=null&&$(c.rank===2||c.rank===1,()=>`Error in batchNorm2D: offset must be rank 2 or rank 1 but got rank ${c.rank}.`),li(a,i,p,c,u,s)}var ok=T({batchNorm2d_:$U});function RU(r,e,t,o,n,s){let a=v(r,"x","batchNorm"),i=v(e,"mean","batchNorm"),p=v(t,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;return o!=null&&(c=v(o,"offset","batchNorm")),$(a.rank===3,()=>`Error in batchNorm3D: x must be rank 3 but got rank ${a.rank}.`),$(i.rank===3||i.rank===1,()=>`Error in batchNorm3D: mean must be rank 3 or rank 1 but got rank ${i.rank}.`),$(p.rank===3||p.rank===1,()=>`Error in batchNorm3D: variance must be rank 3 or rank 1 but got rank ${p.rank}.`),u!=null&&$(u.rank===3||u.rank===1,()=>`Error in batchNorm3D: scale must be rank 3 or rank 1 but got rank ${u.rank}.`),c!=null&&$(c.rank===3||c.rank===1,()=>`Error in batchNorm3D: offset must be rank 3 or rank 1 but got rank ${c.rank}.`),li(a,i,p,c,u,s)}var nk=T({batchNorm3d_:RU});function AU(r,e,t,o,n,s){let a=v(r,"x","batchNorm"),i=v(e,"mean","batchNorm"),p=v(t,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batch
${n} and ${e} for depthToSpace with input shape
${o.shape}`),$(s*e>=0,()=>`Negative dimension size caused by overflow when multiplying
${s} and ${e} for depthToSpace with input shape
${o.shape}`),$(a%(e*e)===0,()=>`Dimension size must be evenly divisible by ${e*e} but is ${a} for depthToSpace with input shape ${o.shape}`);let i={x:o},p={blockSize:e,dataFormat:t};return N.runKernel(yn,i,p)}var Sk=T({depthToSpace_:eG});function tG(r,e,t,o,n="NHWC",s=[1,1],a){let i=v(r,"x","depthwiseConv2d","float32"),p=v(e,"filter","depthwiseConv2d","float32"),u=i,c=!1;i.rank===3&&(c=!0,u=z(i,[1,i.shape[0],i.shape[1],i.shape[2]])),$(u.rank===4,()=>`Error in depthwiseConv2d: input must be rank 4, but got rank ${u.rank}.`),$(p.rank===4,()=>`Error in depthwiseConv2d: filter must be rank 4, but got rank ${p.rank}.`);let l=n==="NHWC"?u.shape[3]:u.shape[1];$(l===p.shape[2],()=>`Error in depthwiseConv2d: number of input channels (${l}) must match the inChannels dimension in filter ${p.shape[2]}.`),Ot("depthwiseConv2d",o,a);let m={x:u,filter:p},f={strides:t,pad:o,dataFormat:n,dilations:s,dimRoundingMode:a},d=N.runKernel(bn,m,f);return c?z(d,[d.shape[1],d.shape[2],d.shape[3]]):d}var Gp=T({depthwiseConv2d_:tG});function rG(r){let t={x:v(r,"x","diag")};return N.runKernel(xp,t)}var vk=T({diag_:rG});function oG(r,e,t,o,n=[1,1],s="NHWC"){let a=v(r,"x","dilation2d"),i=v(e,"filter","dilation2d");$(a.rank===3||a.rank===4,()=>`Error in dilation2d: input must be rank 3 or 4, but got rank ${a.rank}.`),$(i.rank===3,()=>`Error in dilation2d: filter must be rank 3, but got rank ${i.rank}.`),$(s==="NHWC",()=>`Error in dilation2d: Only NHWC is currently supported, but got dataFormat of ${s}`);let p=a,u=!1;a.rank===3&&(p=z(a,[1,a.shape[0],a.shape[1],a.shape[2]]),u=!0);let c={x:p,filter:i},l={strides:t,pad:o,dilations:n},m=N.runKernel(yp,c,l);return u?z(m,[m.shape[1],m.shape[2],m.shape[3]]):m}var kk=T({dilation2d_:oG});function nG(r,e){let t=v(r,"a","equal","string_or_numeric"),o=v(e,"b","equal","string_or_numeric");[t,o]=Re(t,o),Je(t.shape,o.shape);let n={a:t,b:o};return N.runKernel(oo,n)}var gf=T({equal_:nG});function sG(r,e,t){let o=v(e,"a","where"),n=v(t,"b","where"),s=v(r,"condition","where","bool"),a=Je(Je(s.shape,o.shape),n.shape),i=Ls(s,a),p=Ls(o,a),u=Ls(n,a),c={condition:i,t:p,e:u};return N.runKernel(vs,c)}var os=T({where_:sG});function aG(r){let t={x:v(r,"x","zerosLike")};return N.runKernel(Es,t)}var Gt=T({zerosLike_:aG});function iG(r,e){let t=v(r,"a","div"),o=v(e,"b","div");[t,o]=Re(t,o);let n=We(t,o),s=Gt(n),a=gf(o,s);return os(a,s,n)}var Tk=T({divNoNan_:iG});function uG(r,e){let t=v(r,"t1","dot"),o=v(e,"t2","dot");$((t.rank===1||t.rank===2)&&(o.rank===1||o.rank===2),()=>`Error in dot: inputs must all be rank 1 or 2, but got ranks ${t.rank} and ${o.rank}.`);let n=t.rank===1?t.size:t.shape[1],s=o.rank===1?o.size:o.shape[0];if($(n===s,()=>`Error in dot: inner dimensions of inputs must match, but got ${n} and ${s}.`),t.rank===1&&o.rank===1){let a=z(t,[1,-1]),i=z(o,[-1,1]),p=Xe(a,i);return z(p,[])}else if(t.rank===1&&o.rank===2){let a=z(t,[1,-1]),i=z(o,[o.shape[0],o.shape[1]]),p=Xe(a,i);return z(p,[p.size])}else if(t.rank===2&&o.rank===1){let a=z(o,[-1,1]),i=Xe(t,a);return z(i,[i.size])}else{let a=z(o,[o.shape[0],o.shape[1]]);return Xe(t,a)}}var Nk=T({dot_:uG});function pG(r,...e){let t=e.map((n,s)=>v(n,`tensors${s}`,"einsum")),o={equation:r};return N.runKernel(Xa,t,o)}var _k=T({einsum_:pG});function cG(r){let t={x:v(r,"x","elu","float32")};return N.runKernel(In,t)}var xf=T({elu_:cG});function lG(r){let e=v(r,"x","erf");$(e.dtype==="int32"||e.dtype==="float32",()=>"Input dtype must be `int32` or `float32`."),e.dtype==="int32"&&(e=qe(e,"float32"));let t={x:e};return N.runKernel(Gi,t)}var Ek=T({erf_:lG});function fC(r,e){for(let t=0;t<r.length;++t)if(r[r.length-t-1]!==e-1-t)return!1;return!0}function $k(r,e,t){let o=r.length+e.length,n=[],s=0,a=0;for(let i=0;i<o;i++)t.indexOf(i)===-1?n.push(r[s++]):n.push(e[a++]);return n}function mG(r,e){let t=[],o=r.length;for(let s=0;s<o;s++)e.indexOf(s)===-1&&t.push(r[s]);let n=e.map(s=>r[s]);return[t,n]}function Ta(r,e){let t=e.map(o=>1);return $k(r,t,e)}function fG(r,e,t){$(fC(e,t),()=>`${r} supports only inner-most axes for now. Got axes ${e} and rank-${t} input.`)}function dG(r,e){if(fC(r
rank ${s.rank}.`),$(ra(e),()=>`Error in localResponseNormalization: depthRadius must be an integer but got depthRadius ${e}.`);let a=s,i=!1;s.rank===3&&(i=!0,a=z(s,[1,s.shape[0],s.shape[1],s.shape[2]]));let p={x:a},u={depthRadius:e,bias:t,alpha:o,beta:n},c=N.runKernel(wp,p,u);return i?z(c,[c.shape[1],c.shape[2],c.shape[3]]):c}var Bk=T({localResponseNormalization_:zG});function WG(r){let t={x:v(r,"x","log","float32")};return N.runKernel(lo,t)}var Ea=T({log_:WG});function UG(r){let t={x:v(r,"x","log1p")};return N.runKernel(Ki,t)}var Sf=T({log1p_:UG});function GG(r){return $(fs(r),()=>"The f passed in grad(f) must be a function"),(e,t)=>{let o=v(e,"x","tf.grad","string_or_numeric"),n=t!=null?v(t,"dy","tf.grad"):null;return N.tidy(()=>{let{value:s,grads:a}=N.gradients(()=>r(o),[o],n);return n!=null&&ht(s.shape,n.shape,"The shape of dy passed in grad(f)(x, dy) must match the shape returned by f(x)"),vf(a),a[0]})}}function HG(r){return $(fs(r),()=>"The f passed in grads(f) must be a function"),(e,t)=>{$(Array.isArray(e),()=>"The args passed in grads(f)(args) must be an array of `Tensor`s or `TensorLike`s");let o=Ia(e,"args","tf.grads","string_or_numeric"),n=t!=null?v(t,"dy","tf.grads"):null;return N.tidy(()=>{let{value:s,grads:a}=N.gradients(()=>r(...o),o,n);return n!=null&&ht(s.shape,n.shape,"The shape of dy passed in grads(f)([x1,...], dy) must match the shape returned by f([x1,...])"),vf(a),a})}}function qG(r){return $(fs(r),()=>"The f passed in valueAndGrad(f) must be a function"),(e,t)=>{$(e instanceof ut,()=>"The x passed in valueAndGrad(f)(x) must be a tensor"),$(t==null||t instanceof ut,()=>"The dy passed in valueAndGrad(f)(x, dy) must be a tensor");let{grads:o,value:n}=N.gradients(()=>r(e),[e],t);return vf(o),{grad:o[0],value:n}}}function KG(r){return $(fs(r),()=>"The f passed in valueAndGrads(f) must be a function"),(e,t)=>{$(Array.isArray(e)&&e.every(n=>n instanceof ut),()=>"The args passed in valueAndGrads(f)(args) must be array of tensors"),$(t==null||t instanceof ut,()=>"The dy passed in valueAndGrads(f)(args, dy) must be a tensor");let o=N.gradients(()=>r(...e),e,t);return t!=null&&ht(o.value.shape,t.shape,"The shape of dy passed in valueAndGrads(f)([x1,...], dy) must match the shape returned by f([x1,...])"),vf(o.grads),o}}function dC(r,e){$(fs(r),()=>"The f passed in variableGrads(f) must be a function"),$(e==null||Array.isArray(e)&&e.every(u=>u instanceof ba),()=>"The varList passed in variableGrads(f, varList) must be an array of variables");let t=e!=null;if(!t){e=[];for(let u in N.registeredVariables)e.push(N.registeredVariables[u])}let o=t?e.filter(u=>!u.trainable):null,n=e.length;e=e.filter(u=>u.trainable),$(e.length>0,()=>`variableGrads() expects at least one of the input variables to be trainable, but none of the ${n} variables is trainable.`);let s=!0,{value:a,grads:i}=N.gradients(r,e,null,s);$(i.some(u=>u!=null),()=>"Cannot find a connection between any variable and the result of the loss function y=f(x). Please make sure the operations that use variables are inside the function f passed to minimize()."),$(a.rank===0,()=>`The f passed in variableGrads(f) must return a scalar, but it returned a rank-${a.rank} tensor`);let p={};return e.forEach((u,c)=>{i[c]!=null&&(p[u.name]=i[c])}),o!=null&&o.forEach(u=>p[u.name]=null),{value:a,grads:p}}function Cr(r){return N.customGrad(r)}function vf(r){if(r.filter(t=>t==null).length>0)throw new Error(`Cannot compute gradient of y=f(x) with respect to x. Make sure that
the f you passed encloses all operations that lead from x to y.`)}function jG(r){let t={x:v(r,"x","softplus")};return N.runKernel(Qi,t)}var kf=T({softplus_:jG});function XG(r){let e=v(r,"x","logSigmoid");return Cr(o=>({value:yr(kf(yr(o))),gradFunc:a=>oe(a,Ms(yr(o)))}))(e)}var Vk=T({logSigmoid_:XG});function YG(r,e){let t=v(r,"a","sub"),o=v(e,"b","sub");[t,o]=Re(t,o);let n={a:t,b:o};return N.runKernel(Io,n)}var ke=T({sub_:YG});function QG(r,e=-1){let t=v(r,"logits","logSoftmax");if(e===-1&&(e=t.rank-1),e!==t.rank-1)throw Error(`Log Softmax along a non-last dimension is not yet supported. Logits was rank ${t.rank} and axis was ${e}`);return Cr((n,s)=>{let i=Vs(n,e,!0),p=ke(n,i),u=ke(qe(p,"float32"),Ea(tt(Bo(p),e,!0)));return s([u]),{value:u,gradFunc:(l,m)=>{let[f]=m,d=!0,h=Bo(f);return ke(l,oe(tt(l,e,d),h))}}})(t)}var zk=T({logSoftmax_:QG});function ZG(r,e=null,t=!1){let o=v(r,"x","logSumExp"),n=Ka(e,o.shape),s=Vs(o,n,!0),a=ke(o,s),i=Bo(a),p=tt(i,n),u=Ea(p),c=ge(z(s,u.shape),u);if(t){let l=Ta(c.shape,n);return z(c,l)}return c}var Tf=T({logSumExp_:ZG});function JG(r,e){let t=v(r,"a","logicalAnd","bool"),o=v(e,"b","logicalAnd","bool");Je(t.shape,o.shape);let n={a:t,b:o};return N.runKernel(_n,n)}var lu=T({logicalAnd_:JG});function e4(r){let t={x:v(r,"x","logicalNot","bool")};return N.runKernel(En,t)}var Nf=T({logicalNot_:e4});function t4(r,e){let t=v(r,"a","logicalOr","bool"),o=v(e,"b","logicalOr","bool");Je(t.shape,o.shape);let n={a:t,b:o};return N.runKernel(ua,n)}var _f=T({logicalOr_:t4});function r4(r,e){let t=v(r,"a","logicalXor","bool"),o=v(e,"b","logicalXor","bool");return Je(t.shape,o.shape),lu(_f(r,e),Nf(lu(r,e)))}var Wk=T({logicalXor_:r4});var Ef=2147483648;function o4(r,e,t="left"){let o=v(r,"sortedSequence","searchSorted"),n=v(e,"values","searchSorted"),s=o.shape[o.shape.length-1],a=n.shape[n.shape.length-1],i=z(o,[-1,s]),p=z(n,[-1,a]);if(i.rank<2)throw new Error("Sorted input argument must be at least 2-dimensional");if(i.shape[0]!==p.shape[0])throw new Error("Leading dimension of 'sortedSequence' and 'values' must match.");if(Ve(p.shape)>=Ef)throw new Error(`values tensor size must less than ${Ef}`);if(i.shape[1]>=Ef)throw new Error(`trailing dim_size must less than ${Ef} for int32 output type, was ${i.shape[1]}`);let u={sortedSequence:i,values:p},c={side:t};return N.runKernel(Ep,u,c)}var dl=T({searchSorted_:o4});function Uk(r,e){return dl(r,e,"left")}function n4(r,e,t,o,n){let s=v(r,"x","maxPool"),a=1,i=s,p=!1;s.rank===3&&(p=!0,i=z(s,[1,s.shape[0],s.shape[1],s.shape[2]])),$(i.rank===4,()=>`Error in maxPool: input must be rank 4 but got rank ${i.rank}.`),$(lr(t,a),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${t} and dilations '${a}'`),Ot("maxPool",o,n);let u={x:i},c={filterSize:e,strides:t,pad:o,dimRoundingMode:n},l=N.runKernel(Rn,u,c);return p?z(l,[l.shape[1],l.shape[2],l.shape[3]]):l}var $f=T({maxPool_:n4});function s4(r,e=[1,1,1],t,o,n,s="NDHWC"){let a=v(r,"x","maxPool3d"),i=a,p=!1;a.rank===4&&(p=!0,i=z(a,[1,a.shape[0],a.shape[1],a.shape[2],a.shape[3]])),$(i.rank===5,()=>`Error in maxPool3d: x must be rank 5 but got rank ${i.rank}.`),$(s==="NDHWC",()=>`Error in maxPool3d: Only NDHWC is currently supported, but got dataFormat of ${s}`),Ot("maxPool3d",o,n);let u={x:i},c={filterSize:e,strides:t,pad:o,dimRoundingMode:n,dataFormat:s},l=N.runKernel(Sp,u,c);return p?z(l,[l.shape[1],l.shape[2],l.shape[3],l.shape[4]]):l}var Gk=T({maxPool3d_:s4});function a4(r,e,t,o,n=!1){let a={x:v(r,"x","maxPoolWithArgmax")},i={filterSize:e,strides:t,pad:o,includeBatchInIndex:n},p=N.runKernel(vp,a,i);return{result:p[0],indexes:p[1]}}var Hk=T({maxPoolWithArgmax_:a4});function i4(r,e){let t=v(r,"a","maximum"),o=v(e,"b","maximum");[t,o]=Re(t,o),t.dtype==="bool"&&(t=qe(t,"int32"),o=qe(o,"int32")),Je(t.shape,o.shape);let n={a:t,b:o};return N.runKernel(mo,n)}var Rf=T({maximum_:i4});function u4(r,e=null,t=!1){let n={x:v(r,"x","mean")},s={axis:e,keepDims:t};return N.runKernel(An,n,s)}var mu=T({mean_:u4});function Wr(r,e="float32"){if(e==="complex64"){let o=Wr(r,"float32"),n=Wr(r,"float32");return Er(o,n)}let
${n.shape}`);if(s.rank!==1)throw new Error(`Values should be Tensor1D but received shape ${s.shape}`);if(a.rank!==1)throw new Error(`Dense shape should be Tensor1D but received shape ${a.shape}`);if(i.rank!==0)throw new Error(`Default value should be a scalar but received shape ${i.shape}`);let p={indices:n,values:s,denseShape:a,defaultValue:i},u=N.runKernel(Qa,p);return{outputIndices:u[0],outputValues:u[1],emptyRowIndicator:u[2],reverseIndexMap:u[3]}}var VT=T({sparseFillEmptyRows_:Aq});function Fq(r,e,t){let o=v(r,"inputIndices","sparseReshape","int32"),n=v(e,"inputShape","sparseReshape","int32"),s=v(t,"newShape","sparseReshape","int32");if(o.rank!==2)throw new Error(`Input indices should be Tensor2D but received shape
${o.shape}`);if(n.rank!==1)throw new Error(`Input shape should be Tensor1D but received shape ${n.shape}`);if(s.rank!==1)throw new Error(`New shape should be Tensor1D but received shape ${s.shape}`);let a={inputIndices:o,inputShape:n,newShape:s},i=N.runKernel(ga,a);return{outputIndices:i[0],outputShape:i[1]}}var zT=T({sparseReshape_:Fq});function Dq(r,e,t){let o=v(r,"data","sparseSegmentMean"),n=v(e,"indices","sparseSegmentMean","int32"),s=v(t,"segmentIds","sparseSegmentMean","int32");if(o.rank<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.rank!==1)throw new Error(`Indices should be Tensor1D but received shape
${n.shape}`);if(s.rank!==1)throw new Error(`Segment ids should be Tensor1D but received shape
${s.shape}`);let a={data:o,indices:n,segmentIds:s};return N.runKernel(Za,a)}var WT=T({sparseSegmentMean_:Dq});function Pq(r,e,t){let o=v(r,"data","sparseSegmentSum"),n=v(e,"indices","sparseSegmentSum","int32"),s=v(t,"segmentIds","sparseSegmentSum","int32");if(o.rank<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.rank!==1)throw new Error(`Indices should be Tensor1D but received shape
${n.shape}`);if(s.rank!==1)throw new Error(`Segment ids should be Tensor1D but received shape
${s.shape}`);let a={data:o,indices:n,segmentIds:s};return N.runKernel(Ja,a)}var UT=T({sparseSegmentSum_:Pq});function Oq(r,e,t,o,n,s,a,i){let p=v(r,"data","stringNGrams","string");if(p.dtype!=="string")throw new Error("Data must be of datatype string");if(p.shape.length!==1)throw new Error(`Data must be a vector, saw: ${p.shape}`);let u=v(e,"dataSplits","stringNGrams");if(u.dtype!=="int32")throw new Error("Data splits must be of datatype int32");let c={separator:t,nGramWidths:o,leftPad:n,rightPad:s,padWidth:a,preserveShortSequences:i},l={data:p,dataSplits:u},m=N.runKernel(Ns,l,c);return{nGrams:m[0],nGramsSplits:m[1]}}var GT=T({stringNGrams_:Oq});function Mq(r,e,t=!0){let o=v(r,"input","stringSplit","string"),n=v(e,"delimiter","stringSplit","string");if(o.rank!==1)throw new Error(`Input should be Tensor1D but received shape ${o.shape}`);if(n.rank!==0)throw new Error(`Delimiter should be a scalar but received shape ${n.shape}`);let s={skipEmpty:t},a={input:o,delimiter:n},i=N.runKernel(ri,a,s);return{indices:i[0],values:i[1],shape:i[2]}}var HT=T({stringSplit_:Mq});function Lq(r,e){let t=v(r,"input","stringToHashBucketFast","string"),o={numBuckets:e};if(e<=0)throw new Error("Number of buckets must be at least 1");let n={input:t};return N.runKernel(oi,n,o)}var qT=T({stringToHashBucketFast_:Lq});var Bq={fft:qp,ifft:hu,rfft:Kp,irfft:Gf},Vq={hammingWindow:cT,hannWindow:Xf,frame:Yf,stft:lT},zq={flipLeftRight:fT,grayscaleToRGB:dT,resizeNearestNeighbor:kT,resizeBilinear:vT,rotateWithOffset:hT,cropAndResize:mT,nonMaxSuppression:gT,nonMaxSuppressionAsync:bT,nonMaxSuppressionWithScore:CT,nonMaxSuppressionWithScoreAsync:IT,nonMaxSuppressionPadded:wT,nonMaxSuppressionPaddedAsync:ST,threshold:TT,transform:NT},Wq={bandPart:_T,gramSchmidt:ET,qr:RT},Uq={absoluteDifference:AT,computeWeightedLoss:sr,cosineDistance:FT,hingeLoss:DT,huberLoss:PT,logLoss:OT,meanSquaredError:MT,sigmoidCrossEntropy:LT,softmaxCrossEntropy:BT},Gq={sparseFillEmptyRows:VT,sparseReshape:zT,sparseSegmentMean:WT,sparseSegmentSum:UT},Hq={stringNGrams:GT,stringSplit:HT,stringToHashBucketFast:qT};var wr=class extends ll{minimize(e,t=!1,o){let{value:n,grads:s}=this.computeGradients(e,o);if(o!=null){let a=o.map(i=>({name:i.name,tensor:s[i.name]}));this.applyGradients(a)}else this.applyGradients(s);return Ft(s),t?n:(n.dispose(),null)}get iterations(){return this.iterations_==null&&(this.iterations_=0),this.iterations_}incrementIterations(){this.iterations_=this.iterations+1}computeGradients(e,t){return dC(e,t)}dispose(){this.iterations_!=null&&Ft(this.iterations_)}async saveIterations(){return this.iterations_==null&&(this.iterations_=0),{name:"iter",tensor:be(this.iterations_,"int32")}}async getWeights(){throw new Error("getWeights() is not implemented for this optimizer yet.")}async setWeights(e){throw new Error(`setWeights() is not implemented for this optimizer class ${this.getClassName()}`)}async extractIterations(e){return this.iterations_=(await e[0].tensor.data())[0],e.slice(1)}};Object.defineProperty(wr,Symbol.hasInstance,{value:r=>r.minimize!=null&&r.computeGradients!=null&&r.applyGradients!=null});var xi=class extends wr{constructor(e,t,o=null){super(),this.learningRate=e,this.rho=t,this.epsilon=o,this.accumulatedGrads=[],this.accumulatedUpdates=[],o==null&&(this.epsilon=N.backend.epsilon())}applyGradients(e){(Array.isArray(e)?e.map(o=>o.name):Object.keys(e)).forEach((o,n)=>{let s=N.registeredVariables[o],a=!1;this.accumulatedGrads[n]==null&&(this.accumulatedGrads[n]={originalName:`${o}/accum_grad`,variable:Ne(()=>Gt(s).variable(a))}),this.accumulatedUpdates[n]==null&&(this.accumulatedUpdates[n]={originalName:`${o}/accum_var`,variable:Ne(()=>Gt(s).variable(a))});let i=Array.isArray(e)?e[n].tensor:e[o];if(i==null)return;let p=this.accumulatedGrads[n].variable,u=this.accumulatedUpdates[n].variable;Ne(()=>{let c=ge(oe(p,this.rho),oe(Zt(i),1-this.rho)),l=oe(We(Rr(ge(u,this.epsilon)),Rr(ge(p,this.epsilon))),i),m=ge(oe(u,this.rho),oe(Zt(l),1-this.rho));p.assign(c),u.assign(m);let f=ge(oe(l,-this.learningRate),s);s.assign(f)})}),this.incrementIterations()}dispose(
indices.shape[0] = ${r}`}function RK(r,e){return`indices(${r}, 0) is invalid: ${e} < 0`}function AK(r,e,t){return`indices(${r}, 0) is invalid: ${e} >= ${t}`}function FK(r,e){return`only one output dimension may be -1, not both ${r} and ${e}`}function DK(r,e){return`size ${r} must be non-negative, not ${e}`}function PK(){return"reshape cannot infer the missing input size for an empty tensor unless all specified input sizes are non-zero"}function OK(r,e){let t=Ve(r),o=Ve(e);return`Input to reshape is a SparseTensor with ${t}
dense values, but the requested shape requires a multiple of ${o}. inputShape=${r} outputShape= ${e}`}function MK(r,e){let t=Ve(r),o=Ve(e);return`Input to reshape is a tensor with ${t} dense values, but the requested shape has ${o}. inputShape=${r} outputShape=${e}`}function LK(){return"segment ids must be >= 0"}function BK(){return"segment ids are not increasing"}function VK(r,e){return`Segment id ${r} out of range [0, ${e}), possibly because segmentIds input is not sorted.`}function zK(r,e,t){return`Bad: indices[${r}] == ${e} out of range [0, ${t})`}var NC={};Be(NC,{collectGatherOpShapeInfo:()=>GK,computeOutShape:()=>UK,segOpComputeOptimalWindowSize:()=>WK});function WK(r,e){let t=!1,o;for(r<=ed?(o=r,t=!0):o=sp(r,Math.floor(Math.sqrt(r)));!t;)o>e||o===r?t=!0:o=sp(r,o+1);return o}function UK(r,e,t){let o=[],n=r.length;for(let s=0;s<n;s++)s!==e?o.push(r[s]):o.push(t);return o}function GK(r,e,t,o){let n=e.shape.length,s=r.shape.length;if(o!==0&&(o<-n||o>n))throw new Error(`Expect batchDims in the range of [-${n}, ${n}], but got ${o}`);if(o<0&&(o+=n),o>s)throw new Error(`batchDims (${o}) must be less than rank(x) (
${s}).`);if(t<o)throw new Error(`batchDims (${o}) must be less than or equal to axis (${t}).`);for(let l=0;l<o;++l)if(r.shape[l]!==e.shape[l])throw new Error(`x.shape[${l}]: ${r.shape[l]} should be equal to indices.shape[${l}]: ${e.shape[l]}.`);let a=r.shape[t],i=[],p=1,u=1,c=1;for(let l=0;l<o;++l)i.push(r.shape[l]),p*=r.shape[l];for(let l=o;l<t;l++)i.push(r.shape[l]),u*=r.shape[l];for(let l=o;l<n;l++)i.push(e.shape[l]);for(let l=t+1;l<s;l++)i.push(r.shape[l]),c*=r.shape[l];return{batchSize:p,sliceSize:c,outerSize:u,dimSize:a,outputShape:i}}function HK(r){try{return r.map(e=>Op(e))}catch(e){throw new Error(`Failed to decode encoded string bytes into utf-8, error: ${e}`)}}function qK(r){return r.map(e=>si(e))}var Bt={};Be(Bt,{nonMaxSuppressionV3Impl:()=>Qf,nonMaxSuppressionV4Impl:()=>Zf,nonMaxSuppressionV5Impl:()=>Jf,whereImpl:()=>Kf});var KK=P();KK.registerFlag("KEEP_INTERMEDIATE_TENSORS",()=>!1,r=>{r&&console.warn("Keep intermediate tensors is ON. This will print the values of all intermediate tensors during model inference. Not all models support this mode. For details, check e2e/benchmarks/ model_config.js. This significantly impacts performance.")});var To;(function(r){r[r.DT_INVALID=0]="DT_INVALID",r[r.DT_FLOAT=1]="DT_FLOAT",r[r.DT_DOUBLE=2]="DT_DOUBLE",r[r.DT_INT32=3]="DT_INT32",r[r.DT_UINT8=4]="DT_UINT8",r[r.DT_INT16=5]="DT_INT16",r[r.DT_INT8=6]="DT_INT8",r[r.DT_STRING=7]="DT_STRING",r[r.DT_COMPLEX64=8]="DT_COMPLEX64",r[r.DT_INT64=9]="DT_INT64",r[r.DT_BOOL=10]="DT_BOOL",r[r.DT_QINT8=11]="DT_QINT8",r[r.DT_QUINT8=12]="DT_QUINT8",r[r.DT_QINT32=13]="DT_QINT32",r[r.DT_BFLOAT16=14]="DT_BFLOAT16",r[r.DT_QINT16=15]="DT_QINT16",r[r.DT_QUINT16=16]="DT_QUINT16",r[r.DT_UINT16=17]="DT_UINT16",r[r.DT_COMPLEX128=18]="DT_COMPLEX128",r[r.DT_HALF=19]="DT_HALF",r[r.DT_RESOURCE=20]="DT_RESOURCE",r[r.DT_VARIANT=21]="DT_VARIANT",r[r.DT_UINT32=22]="DT_UINT32",r[r.DT_UINT64=23]="DT_UINT64",r[r.DT_FLOAT_REF=101]="DT_FLOAT_REF",r[r.DT_DOUBLE_REF=102]="DT_DOUBLE_REF",r[r.DT_INT32_REF=103]="DT_INT32_REF",r[r.DT_UINT8_REF=104]="DT_UINT8_REF",r[r.DT_INT16_REF=105]="DT_INT16_REF",r[r.DT_INT8_REF=106]="DT_INT8_REF",r[r.DT_STRING_REF=107]="DT_STRING_REF",r[r.DT_COMPLEX64_REF=108]="DT_COMPLEX64_REF",r[r.DT_INT64_REF=109]="DT_INT64_REF",r[r.DT_BOOL_REF=110]="DT_BOOL_REF",r[r.DT_QINT8_REF=111]="DT_QINT8_REF",r[r.DT_QUINT8_REF=112]="DT_QUINT8_REF",r[r.DT_QINT32_REF=113]="DT_QINT32_REF",r[r.DT_BFLOAT16_REF=114]="DT_BFLOAT16_REF",r[r.DT_QINT16_REF=115]="DT_QINT16_REF",r[r.DT_QUINT16_REF=116]="DT_QUINT16_REF",r[r.DT_UINT16_REF=117]="DT_UINT16_REF",r[r.DT_COMPLEX128_REF=118]="DT_COMPLEX128_REF",r[r.DT_HALF_REF=119]="DT_HALF_REF",r[r.DT_RESOURCE_REF=120]="DT_RESOURCE_REF",r[r.DT_VARIANT_REF=121]="DT_VARIANT_REF",r[r.DT_UINT32_REF=122]="DT_UINT32_REF",r[r.DT_UINT64_REF=123]="DT_UINT64_REF"})(To||(To={}));var XT;(function(r){let e;(function(t){t[t.LEGACY=0]="LEGACY",t[t.V1=1]="V1",t[t.V2=2]="V2"})(e=r.CheckpointFormatVersion||(r.CheckpointFormatVersion={}))})(XT||(XT={}));var EC={};function XK(r,e){let t={tfOpName:r,category:"custom",inputs:[],attrs:[],customExecutor:e};EC[r]=t}function td(r){return EC[r]}function YK(r){delete EC[r]}function S(r,e,t,o,n){let s=e.inputParams[r];if(s&&s.inputIndexStart!==void 0){let i=s.inputIndexStart,p=s.inputIndexEnd===0?void 0:s.inputIndexEnd===void 0?i+1:s.inputIndexEnd;if(s.type==="tensor")return Ht(e.inputNames[s.inputIndexStart],t,o,n);if(s.type==="tensors")return e.inputNames.slice(i,p).map(m=>Ht(m,t,o,n));let u=Ht(e.inputNames.slice(i)[0],t,o,n),c=u.dataSync();return s.type==="number"?c[0]:x.toNestedArray(u.shape,c)}let a=e.attrParams[r];return a&&a.value}function Ht(r,e,t,o){let[n,s]=Sr(r);if(o!=null){let i=o.getHashTableHandleByName(n);if(i!=null)return i}let a=t.currentContextIds.find(i=>!!e[rd(n,i)]);return a!==void 0?e[rd(n,a)][s]:void 0}function YT(r,e,t){return e[rd(r,t.currentContextId)]}function zo(r,e){let[t,o,n]=Sr(r);return[rd(t,e&&e.currentContextId),o,n]}function rd(r,e){return e?`${r}-${e}`:r}function Sr(r){let e=r.split(":");if(e.length===1)return[r,0,void 0];let t=e[0],o=e.length===3?e[1]:void
because the value dtype is ${t.dtype}, but TensorArray dtype is ${this.dtype}.`);if(this.size()===0&&(this.elementShape==null||this.elementShape.length===0)&&(this.elementShape=t.shape),Ur(this.elementShape,t.shape,`TensorArray ${this.name}: Could not write to TensorArray index ${e}.`),o.read)throw new Error(`TensorArray ${this.name}: Could not write to TensorArray index ${e}, because it has already been read.`);if(o.written)throw new Error(`TensorArray ${this.name}: Could not write to TensorArray index ${e}, because it has already been written.`);o.tensor=t,So(t),o.written=!0,this.tensors[e]=o}writeMany(e,t){if(e.length!==t.length)throw new Error(`TensorArray ${this.name}: could not write multiple tensors,because the index size: ${e.length} is not the same as tensors size: ${t.length}.`);e.forEach((o,n)=>this.write(o,t[n]))}gather(e,t){if(!!t&&t!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but gather requested dtype ${t}`);if(e)e=e.slice(0,this.size());else{e=[];for(let n=0;n<this.size();n++)e.push(n)}if(e.length===0)return nr([],[0].concat(this.elementShape));let o=this.readMany(e);return Ur(this.elementShape,o[0].shape,"TensorArray shape mismatch: "),Ir(o,0)}concat(e){if(!!e&&e!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but concat requested dtype ${e}`);if(this.size()===0)return nr([],[0].concat(this.elementShape));let t=[];for(let n=0;n<this.size();n++)t.push(n);let o=this.readMany(t);return Ur(this.elementShape,o[0].shape,`TensorArray shape mismatch: tensor array shape (${this.elementShape}) vs first tensor shape (${o[0].shape})`),gt(o,0)}scatter(e,t){if(t.dtype!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but tensor has dtype ${t.dtype}`);if(e.length!==t.shape[0])throw new Error(`Expected len(indices) == tensor.shape[0], but saw: ${e.length} vs. ${t.shape[0]}`);let o=Math.max(...e);if(!this.dynamicSize&&o>=this.maxSize)throw new Error(`Max index must be < array size (${o} vs. ${this.maxSize})`);this.writeMany(e,ko(t,0))}split(e,t){if(t.dtype!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but tensor has dtype ${t.dtype}`);let o=0,n=e.map(p=>(o+=p,o));if(o!==t.shape[0])throw new Error(`Expected sum of lengths to be equal to
tensor.shape[0], but sum of lengths is
${o}, and tensor's shape is: ${t.shape}`);if(!this.dynamicSize&&e.length!==this.maxSize)throw new Error(`TensorArray's size is not equal to the size of lengths (${this.maxSize} vs. ${e.length}), and the TensorArray is not marked as dynamically resizeable`);let s=o===0?0:t.size/o,a=[];Ne(()=>{t=z(t,[1,o,s]);for(let p=0;p<e.length;++p){let c=[0,p===0?0:n[p-1],0],l=[1,e[p],s];a[p]=z(Ue(t,c,l),this.elementShape)}return a});let i=[];for(let p=0;p<e.length;p++)i[p]=p;this.writeMany(i,a)}};var Ra=class{constructor(e,t,o,n=-1){this.tensors=e,this.elementShape=t,this.elementDtype=o,e!=null&&e.forEach(s=>{if(o!==s.dtype)throw new Error(`Invalid data types; op elements ${o}, but list elements ${s.dtype}`);Ur(t,s.shape,"TensorList shape mismatch: "),So(s)}),this.idTensor=be(0),this.maxNumElements=n,So(this.idTensor)}get id(){return this.idTensor.id}copy(){return new Ra([...this.tensors],this.elementShape,this.elementDtype)}clearAndClose(e){this.tensors.forEach(t=>{(e==null||!e.has(t.id))&&t.dispose()}),this.tensors.length=0,this.idTensor.dispose()}size(){return this.tensors.length}stack(e,t,o=-1){if(t!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t}, but list elements ${this.elementDtype}`);if(o!==-1&&this.tensors.length!==o)throw new Error(`Operation expected a list with ${o} elements but got a list with ${this.tensors.length} elements.`);Ur(e,this.elementShape,"TensorList shape mismatch: ");let n=Xp(this.elementShape,this.tensors,e);return Ne(()=>{let s=this.tensors.map(a=>z(a,n));return Ir(s,0)})}popBack(e,t){if(t!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t}, but list elements ${this.elementDtype}`);if(this.size()===0)throw new Error("Trying to pop from an empty list.");let o=Xp(this.elementShape,this.tensors,e),n=this.tensors.pop();return n.kept=!1,Ur(n.shape,e,"TensorList shape mismatch: "),z(n,o)}pushBack(e){if(e.dtype!==this.elementDtype)throw new Error(`Invalid data types; op elements ${e.dtype}, but list elements ${this.elementDtype}`);if(Ur(e.shape,this.elementShape,"TensorList shape mismatch: "),this.maxNumElements===this.size())throw new Error("Trying to push element into a full list.");So(e),this.tensors.push(e)}resize(e){if(e<0)throw new Error(`TensorListResize expects size to be non-negative. Got: ${e}`);if(this.maxNumElements!==-1&&e>this.maxNumElements)throw new Error(`TensorListResize input size ${e} is greater maxNumElement ${this.maxNumElements}.`);let t=new Ra([],this.elementShape,this.elementDtype,this.maxNumElements);t.tensors.length=e;for(let o=0;o<Math.min(this.tensors.length,e);++o)t.tensors[o]=this.tensors[o];return t}getItem(e,t,o){if(o!==this.elementDtype)throw new Error(`Invalid data types; op elements ${o}, but list elements ${this.elementDtype}`);if(e<0||e>this.tensors.length)throw new Error(`Trying to access element ${e} in a list with ${this.tensors.length} elements.`);if(this.tensors[e]==null)throw new Error(`element at index ${e} is null.`);Ur(this.tensors[e].shape,t,"TensorList shape mismatch: ");let n=Xp(this.elementShape,this.tensors,t);return z(this.tensors[e],n)}setItem(e,t){if(t.dtype!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t.dtype}, but list elements ${this.elementDtype}`);if(e<0||this.maxNumElements!==-1&&e>=this.maxNumElements)throw new Error(`Trying to set element ${e} in a list with max ${this.maxNumElements} elements.`);Ur(this.elementShape,t.shape,"TensorList shape mismatch: "),So(t),this.tensors[e]!=null&&(this.tensors[e].kept=!1),this.tensors[e]=t}gather(e,t,o){if(t!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t}, but list elements ${this.elementDtype}`);Ur(this.elementShape,o,"TensorList shape mismatch: "),e=e.slice(0,this.size());let n=Xp(this.elementShape,this.tensors,o);return e.length===0?nr([],[0].concat(n)):Ne(()=>{let s=e.map(a=>z(this.tensors[a],n));return Ir(s,0)})}concat(e,t){if(!!e&&e!==this.elementDtype)throw new Error(`TensorList dtype is ${this.elementDtype} but concat requested dtype ${e}`);Ur(this.elementShape,t,"TensorList shape mismatch: ");let o
tensor.shape[0], but sum of lengths is
${o}, and tensor's shape is: ${r.shape}`);let s=r.shape.slice(1),a=dd(s,t),i=o===0?0:r.size/o,p=Ne(()=>{let c=[];r=z(r,[1,o,i]);for(let l=0;l<e.length;++l){let f=[0,l===0?0:n[l-1],0],d=[1,e[l],i];c[l]=z(Ue(r,f,d),a)}return r.dispose(),c}),u=new Ra([],t,r.dtype,e.length);for(let c=0;c<p.length;c++)u.setItem(c,p[c]);return u}var iN=async(r,e,t)=>{switch(r.op){case"If":case"StatelessIf":{let o=S("thenBranch",r,e,t),n=S("elseBranch",r,e,t),s=S("cond",r,e,t),a=S("args",r,e,t);return(await s.data())[0]?t.functionMap[o].executeFunctionAsync(a,t.tensorArrayMap,t.tensorListMap):t.functionMap[n].executeFunctionAsync(a,t.tensorArrayMap,t.tensorListMap)}case"While":case"StatelessWhile":{let o=S("body",r,e,t),n=S("cond",r,e,t),s=S("args",r,e,t),a=await t.functionMap[n].executeFunctionAsync(s,t.tensorArrayMap,t.tensorListMap),i=s.map(c=>c.id),p=await a[0].data();a.forEach(c=>{!c.kept&&i.indexOf(c.id)===-1&&c.dispose()});let u=s;for(;p[0];){let c=u;u=await t.functionMap[o].executeFunctionAsync(u,t.tensorArrayMap,t.tensorListMap);let l=u.map(f=>f.id);c.forEach(f=>{!f.kept&&i.indexOf(f.id)===-1&&l.indexOf(f.id)===-1&&f.dispose()});let m=await t.functionMap[n].executeFunctionAsync(u,t.tensorArrayMap,t.tensorListMap);p=await m[0].data(),m.forEach(f=>{!f.kept&&i.indexOf(f.id)===-1&&l.indexOf(f.id)===-1&&f.dispose()})}return u}case"LoopCond":{let o=S("pred",r,e,t);return[ss(o)]}case"Switch":{let o=S("pred",r,e,t),n=S("data",r,e,t);return n.kept||(n=ss(n)),(await o.data())[0]?[void 0,n]:[n,void 0]}case"Merge":{let o=r.inputNames.find(n=>Ht(n,e,t)!==void 0);if(o){let n=Ht(o,e,t);return[ss(n)]}return}case"Enter":{let o=S("frameName",r,e,t),n=S("tensor",r,e,t);return t.enterFrame(o),[ss(n)]}case"Exit":{let o=S("tensor",r,e,t);return t.exitFrame(),[ss(o)]}case"NextIteration":{let o=S("tensor",r,e,t);return t.nextIteration(),[ss(o)]}case"TensorArrayV3":{let o=S("size",r,e,t),n=S("dtype",r,e,t),s=S("elementShape",r,e,t),a=S("dynamicSize",r,e,t),i=S("clearAfterRead",r,e,t),p=S("identicalElementShapes",r,e,t),u=S("name",r,e,t),c=new hd(u,n,o,s,p,a,i);return t.addTensorArray(c),[c.idTensor,be(1)]}case"TensorArrayWriteV3":{let o=S("tensorArrayId",r,e,t),n=S("index",r,e,t),s=S("tensor",r,e,t),a=t.getTensorArray(o.id);return a.write(n,s),[a.idTensor]}case"TensorArrayReadV3":{let o=S("tensorArrayId",r,e,t),n=S("index",r,e,t);return[t.getTensorArray(o.id).read(n)]}case"TensorArrayGatherV3":{let o=S("tensorArrayId",r,e,t),n=S("indices",r,e,t),s=S("dtype",r,e,t);return[t.getTensorArray(o.id).gather(n,s)]}case"TensorArrayScatterV3":{let o=S("tensorArrayId",r,e,t),n=S("indices",r,e,t),s=S("tensor",r,e,t),a=t.getTensorArray(o.id);return a.scatter(n,s),[a.idTensor]}case"TensorArrayConcatV3":{let o=S("tensorArrayId",r,e,t),n=t.getTensorArray(o.id),s=S("dtype",r,e,t);return[n.concat(s)]}case"TensorArraySplitV3":{let o=S("tensorArrayId",r,e,t),n=S("tensor",r,e,t),s=S("lengths",r,e,t),a=t.getTensorArray(o.id);return a.split(s,n),[a.idTensor]}case"TensorArraySizeV3":{let o=S("tensorArrayId",r,e,t),n=t.getTensorArray(o.id);return[be(n.size(),"int32")]}case"TensorArrayCloseV3":{let o=S("tensorArrayId",r,e,t),n=t.getTensorArray(o.id);return n.clearAndClose(),[n.idTensor]}case"TensorListSetItem":{let o=S("tensorListId",r,e,t),n=S("index",r,e,t),s=S("tensor",r,e,t),a=t.getTensorList(o.id);return a.setItem(n,s),[a.idTensor]}case"TensorListGetItem":{let o=S("tensorListId",r,e,t),n=S("index",r,e,t),s=S("elementShape",r,e,t),a=S("elementDType",r,e,t);return[t.getTensorList(o.id).getItem(n,s,a)]}case"TensorListScatterV2":case"TensorListScatter":{let o=S("indices",r,e,t),n=S("tensor",r,e,t),s=S("elementShape",r,e,t),a=S("numElements",r,e,t),i=sN(n,o,s,a);return t.addTensorList(i),[i.idTensor]}case"TensorListReserve":case"EmptyTensorList":{let o=S("elementShape",r,e,t),n=S("elementDType",r,e,t),s;r.op==="TensorListReserve"?s="numElements":s="maxNumElements";let a=S(s,r,e,t),i=r.op==="TensorListReserve"?-1:a,p=nN(o,n,a,i);return t.addTensorList(p),[p.idTensor]}case"TensorListGather":{let o=S("tensorListId",r,e,t),n=S("indices",r,e,t),s=S("elementShape",r,e,t),a=S("elemen
============================
Hi, looks like you are running TensorFlow.js in Node.js. To speed things up dramatically, install our node backend, visit https://github.com/tensorflow/tfjs-node for more details.
============================`));let n={id:this.nextDataId()};return this.data.set(n,{values:e,dtype:o,refCount:1}),n}makeTensorInfo(e,t,o){let n;if(t==="string"&&o!=null&&o.length>0&&x.isString(o[0])){let s=o.map(a=>x.encodeString(a));n=this.write(s,e,t)}else n=this.write(o,e,t);return{dataId:n,shape:e,dtype:t}}refCount(e){return this.data.has(e)?this.data.get(e).refCount:0}incRef(e){let t=this.data.get(e);t.refCount++}decRef(e){if(this.data.has(e)){let t=this.data.get(e);t.refCount--}}move(e,t,o,n,s){this.data.set(e,{values:t,dtype:n,refCount:s})}numDataIds(){return this.data.numDataIds()}async read(e){return this.readSync(e)}readSync(e){let{dtype:t,complexTensorInfos:o}=this.data.get(e);if(t==="complex64"){let n=this.readSync(o.real.dataId),s=this.readSync(o.imag.dataId);return I.mergeRealAndImagArrays(n,s)}return this.data.get(e).values}bufferSync(e){let t=this.readSync(e.dataId);if(e.dtype==="string")try{let o=t.map(n=>x.decodeString(n));return ne(e.shape,e.dtype,o)}catch(o){throw new Error("Failed to decode encoded string bytes into utf-8")}return ne(e.shape,e.dtype,t)}makeOutput(e,t,o){return cr().makeTensorFromTensorInfo(this.makeTensorInfo(t,o,e),this)}disposeData(e,t=!1){if(this.data.has(e)){if(this.data.get(e).refCount--,!t&&this.data.get(e).refCount>0)return!1;let{complexTensorInfos:o}=this.data.get(e);o!=null&&(this.disposeData(o.real.dataId,!0),this.disposeData(o.imag.dataId,!0)),this.data.delete(e)}return!0}disposeIntermediateTensorInfo(e){this.disposeData(e.dataId)}async time(e){let t=x.now();return e(),{kernelMs:x.now()-t}}memory(){return{unreliable:!0,reasons:["The reported memory is an upper bound. Due to automatic garbage collection, the true allocated memory may be less."]}}where(e){K([e],"where");let t=this.readSync(e.dataId);return K6(e.shape,t)}dispose(){}floatPrecision(){return 32}epsilon(){return super.epsilon()}};Si.nextDataId=0;var Ad={};Be(Ad,{addImpl:()=>rI,bincountImpl:()=>Zp,bincountReduceImpl:()=>yd,castImpl:()=>tI,ceilImpl:()=>oI,concatImpl:()=>Iu,equalImpl:()=>nI,expImpl:()=>aI,expm1Impl:()=>uI,floorImpl:()=>pI,gatherNdImpl:()=>bd,gatherV2Impl:()=>Cd,greaterEqualImpl:()=>lI,greaterImpl:()=>cI,lessEqualImpl:()=>fI,lessImpl:()=>mI,linSpaceImpl:()=>Id,logImpl:()=>dI,maxImpl:()=>wd,maximumImpl:()=>hI,minimumImpl:()=>gI,multiplyImpl:()=>Cl,negImpl:()=>xI,notEqualImpl:()=>yI,prodImpl:()=>bI,raggedGatherImpl:()=>Sd,raggedRangeImpl:()=>vd,raggedTensorToTensorImpl:()=>kd,rangeImpl:()=>Su,rsqrtImpl:()=>CI,scatterImpl:()=>Aa,sigmoidImpl:()=>e2,simpleAbsImpl:()=>eI,sliceImpl:()=>vu,sparseFillEmptyRowsImpl:()=>Td,sparseReshapeImpl:()=>Nd,sparseSegmentReductionImpl:()=>tc,sqrtImpl:()=>o2,squaredDifferenceImpl:()=>wI,stridedSliceImpl:()=>_d,stringNGramsImpl:()=>ku,stringSplitImpl:()=>Tu,stringToHashBucketFastImpl:()=>Nu,subImpl:()=>vI,tileImpl:()=>Ed,topKImpl:()=>$d,transposeImpl:()=>Jp,uniqueImpl:()=>Rd});function eI(r){let e=new Float32Array(r.length);for(let t=0;t<r.length;++t)e[t]=Math.abs(r[t]);return e}var j6=r=>{let{x:e}=r.inputs,t=r.backend;K(e,"abs");let o=new Float32Array(x.sizeFromShape(e.shape)),n=t.data.get(e.dataId).values;return o=eI(n),t.makeOutput(o,e.shape,e.dtype)},TN={kernelName:sn,backendName:"cpu",kernelFunc:j6};function Le(r){return(e,t,o,n,s)=>{let a=I.assertAndGetBroadcastShape(e,t),i=a.length,p=x.computeStrides(a),u=x.sizeFromShape(a),c=x.getTypedArrayFromDType(s,u),l=e.length,m=t.length,f=x.computeStrides(e),d=x.computeStrides(t),h=I.getBroadcastDims(e,a),g=I.getBroadcastDims(t,a);if(h.length+g.length===0)for(let y=0;y<c.length;++y)c[y]=r(o[y%o.length],n[y%n.length]);else for(let y=0;y<c.length;++y){let b=x.indexToLoc(y,i,p),C=b.slice(-l);h.forEach(E=>C[E]=0);let w=x.locToIndex(C,l,f),k=b.slice(-m);g.forEach(E=>k[E]=0);let _=x.locToIndex(k,m,d);c[y]=r(o[w],n[_])}return[c,a]}}function qt(r){let{inputs:e,backend:t}=r,{real:o,imag:n}=e,s=t.data.get(o.dataId).values,a=t.data.get(n.dataId).values,i=t.makeTensorInfo(o.shape,"complex64"),p=t.data.get(i.dataId);return p.complexTensorInfos={real:t.makeTensorInfo(o.shape,"float32",s),imag:t.makeTensorInfo(n.shape,"float32",a)},i}var N
${s.shape}`);if(o.shape.length!==2)throw new Error(`Indices must be a matrix, saw:
${o.shape}`);if(n.shape.length!==1)throw new Error(`Values must be a vector, saw:
${n.shape}`);if(a.shape.length!==0)throw new Error(`Default value must be a scalar, saw:
${a.shape}`);let i=t.data.get(o.dataId).values,p=t.data.get(n.dataId).values,u=t.data.get(s.dataId).values,c=t.data.get(a.dataId).values[0],[l,m,f,d,h]=Td(i,o.shape,o.dtype,p,n.dtype,u,c);return[t.makeTensorInfo(m,o.dtype,l),t.makeTensorInfo([m[0]],n.dtype,f),t.makeTensorInfo([d.length],"bool",new Uint8Array(d.map(g=>Number(g)))),t.makeTensorInfo([h.length],o.dtype,new Int32Array(h))]}var SE={kernelName:Qa,backendName:"cpu",kernelFunc:F5};function D5(r){let{inputs:e,backend:t}=r,{inputIndices:o,inputShape:n,newShape:s}=e;if(o.shape.length!==2)throw new Error(`Input indices should be a matrix but received shape
${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=Array.from(t.data.get(n.dataId).values),i=t.data.get(o.dataId).values,p=Array.from(t.data.get(s.dataId).values),[u,c,l]=Nd(i,o.shape,o.dtype,a,p);return[t.makeTensorInfo(c,o.dtype,u),t.makeTensorInfo([l.length],s.dtype,new Int32Array(l))]}var vE={kernelName:ga,backendName:"cpu",kernelFunc:D5};function P5(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);if(n.shape[0]!==s.shape[0])throw new Error("segmentIds and indices should have same size.");let a=t.data.get(o.dataId).values,i=t.data.get(n.dataId).values,p=t.data.get(s.dataId).values,[u,c]=tc(a,o.shape,o.dtype,i,p,!0);return t.makeTensorInfo(c,o.dtype,u)}var kE={kernelName:Za,backendName:"cpu",kernelFunc:P5};function O5(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);if(n.shape[0]!==s.shape[0])throw new Error("segmentIds and indices should have same size.");let a=t.data.get(o.dataId).values,i=t.data.get(n.dataId).values,p=t.data.get(s.dataId).values,[u,c]=tc(a,o.shape,o.dtype,i,p);return t.makeTensorInfo(c,o.dtype,u)}var TE={kernelName:Ja,backendName:"cpu",kernelFunc:O5};function M5(r){let{inputs:e,backend:t,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=e,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=I.calculateShapes(s,n,i),f=!1,d=t.bufferSync(n),h;switch(s.dtype){case"bool":{let g=t.bufferSync(s),y=Boolean(t.data.get(a.dataId).values[0]);h=Aa(d,g,i,m,c,u,p,l,y,f);break}case"float32":{let g=t.bufferSync(s),y=t.data.get(a.dataId).values[0];h=Aa(d,g,i,m,c,u,p,l,y,f);break}case"int32":{let g=t.bufferSync(s),y=t.data.get(a.dataId).values[0];h=Aa(d,g,i,m,c,u,p,l,y,f);break}case"string":{let g=t.bufferSync(s),y=x.decodeString(t.data.get(a.dataId).values[0]);h=Aa(d,g,i,m,c,u,p,l,y,f);break}default:throw new Error(`Unsupported type ${s.dtype}`)}return t.makeTensorInfo(i,h.dtype,h.values)}var NE={kernelName:ei,backendName:"cpu",kernelFunc:M5};function L5(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=o,i=x.parseAxisParam(a,n.shape)[0],p=I.prepareSplitSize(n,s,i),u=new Array(n.shape.length).fill(0),c=n.shape.slice();return p.map(l=>{let m=[...c];m[i]=l;let f=qo({inputs:{x:n},backend:t,attrs:{begin:u,size:m}});return u[i]+=l,f})}var _E={kernelName:Ts,backendName:"cpu",kernelFunc:L5};var EE={kernelName:ti,backendName:"cpu",kernelFunc:({inputs:r,backend:e})=>{let{x:t}=r,o=e;K(t,"square");let n=o.data.get(t.dataId).values,s=new Float32Array(n.length);for(let i=0;i<n.length;++i){let p=n[i];s[i]=p*p}return{dataId:o.write(s,t.shape,t.dtype),shape:t.shape,dtype:t.dtype}}};var B5=we($s,(r,e)=>{let t=e;return isNaN(r)?NaN:r>0?1:t.alpha}),$E={kernelName:$s,backendName:"cpu",kernelFunc:B5};function V5(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o;K(n,"stridedSlice");let{finalShapeSparse:f,finalShape:d,isIdentity:h,sliceDim0:g,isSimpleSlice:y,begin:b,end:C,strides:w}=et.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=Oe({inputs:{x:n},backend:t,attrs:{shape:d}});else if(g||y){x.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=et.computeOutShape(b,C,w),E=qo({inputs:{x:n},backend:t,attrs:{begin:b,size:_}});k=Oe({inputs:{x:E},backend:t,attrs:{shape:d}}),t.disposeIntermediateTensorInfo(E)}else{let _=t.bufferSync(n),E=_d(f,_,w,b);k=t.makeTensorInfo(d,E.dtype,E.values)}return k}var RE={kernelName:Yn,backendName:"cpu",kernelFunc:V5};function z5(r){let{inputs:e,backend:t,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=e,m=t.data.get(c.dataId).values,f=t.data.get(l.dataId).values,[d,h]=ku(m,f,n,s,a,i,p,u);return[t.makeTensorInfo([d.length],"string",d),t.makeTensorInfo(l.shape,"int32",h)]}var AE={kernelName:Ns,backendName:"cpu",kernelFunc:z5};function W5(r){let{inputs:e,backend:t,attrs:o}=r,{skipEmpty:n}=o,{input:s,delimiter:a}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(s.shape.length!==1)throw new Error(`Input must be a vector, got shape: ${s.shape}`);if(a.shape.length!==0)throw new Error(`Delimiter must be a scalar, got shape: ${a.shape}`);let i=t.data.get(s.dataId).values,p=t.data.get(a.dataId).values[0],[u,c,l]=Tu(i,p,n),m=c.length;return[t.makeTensorInfo([m,2],"int32",u),t.makeTensorInfo([m],"string",c),t.makeTensorInfo([2],"int32",new Int32Array(l))]}var FE={kernelName:ri,backendName:"cpu",kernelFunc:W5};function U5(r){let{inputs:e,backend:t,attrs:o}=r,{numBuckets:n}=o,{input:s}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(n<=0)throw new Error("Number of buckets must be at least 1");let a=t.data.get(s.dataId).values,i=Nu(a,n);return t.makeTensorInfo(s.shape,"int32",i)}var DE={kernelName:oi,backendName:"cpu",kernelFunc:U5};var G5=we(xa,r=>Math.t
`),s=n.length.toString().length+2,a=n.map((l,m)=>x.rightPad((m+1).toString(),s)+l),i=0;for(let l=0;l<a.length;l++)i=Math.max(a[l].length,i);let p=a.slice(0,o-1),u=a.slice(o-1,o),c=a.slice(o);console.log(p.join(`
`)),console.log(e.split(`
`)[0]),console.log(`%c ${x.rightPad(u[0],i)}`,"border:1px solid red; background-color:#e3d2d2; color:#a61717"),console.log(c.join(`
`))}function UI(r){return Da(r,()=>r.createProgram(),"Unable to create WebGLProgram.")}function GI(r,e){if(me(r,()=>r.linkProgram(e)),!P().get("ENGINE_COMPILE_ONLY")&&r.getProgramParameter(e,r.LINK_STATUS)===!1)throw console.log(r.getProgramInfoLog(e)),new Error("Failed to link vertex and fragment shaders.")}function $l(r,e){if(me(r,()=>r.validateProgram(e)),r.getProgramParameter(e,r.VALIDATE_STATUS)===!1)throw console.log(r.getProgramInfoLog(e)),new Error("Shader program validation failed.")}function HI(r,e){let t=Da(r,()=>r.createBuffer(),"Unable to create WebGLBuffer");return me(r,()=>r.bindBuffer(r.ARRAY_BUFFER,t)),me(r,()=>r.bufferData(r.ARRAY_BUFFER,e,r.STATIC_DRAW)),t}function qI(r,e){let t=Da(r,()=>r.createBuffer(),"Unable to create WebGLBuffer");return me(r,()=>r.bindBuffer(r.ELEMENT_ARRAY_BUFFER,t)),me(r,()=>r.bufferData(r.ELEMENT_ARRAY_BUFFER,e,r.STATIC_DRAW)),t}function l8(){return P().getNumber("WEBGL_VERSION")===2?1:4}function KI(r){return Da(r,()=>r.createTexture(),"Unable to create WebGLTexture.")}function jI(r,e){let t=P().getNumber("WEBGL_MAX_TEXTURE_SIZE");if(r<=0||e<=0){let o=`[${r}x${e}]`;throw new Error("Requested texture size "+o+" is invalid.")}if(r>t||e>t){let o=`[${r}x${e}]`,n=`[${t}x${t}]`;throw new Error("Requested texture size "+o+" greater than WebGL maximum on this browser / GPU "+n+".")}}function XI(r){return Da(r,()=>r.createFramebuffer(),"Unable to create WebGLFramebuffer.")}function Wd(r,e,t,o,n,s,a){let i=r.getAttribLocation(e,t);return i===-1?!1:(me(r,()=>r.bindBuffer(r.ARRAY_BUFFER,o)),me(r,()=>r.vertexAttribPointer(i,n,r.FLOAT,!1,s,a)),me(r,()=>r.enableVertexAttribArray(i)),!0)}function KE(r,e,t){XE(r,t),me(r,()=>r.activeTexture(r.TEXTURE0+t)),me(r,()=>r.bindTexture(r.TEXTURE_2D,e))}function m8(r,e){XE(r,e),me(r,()=>r.activeTexture(r.TEXTURE0+e)),me(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function YI(r,e,t){return Da(r,()=>r.getUniformLocation(e,t),'uniform "'+t+'" not present in program.')}function QI(r,e,t){return r.getUniformLocation(e,t)}function ZI(r,e,t,o){me(r,()=>KE(r,e,o)),me(r,()=>r.uniform1i(t,o))}function f8(r){me(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,null)),me(r,()=>r.viewport(0,0,r.canvas.width,r.canvas.height)),me(r,()=>r.scissor(0,0,r.canvas.width,r.canvas.height))}function Rl(r,e,t){me(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,t)),me(r,()=>r.framebufferTexture2D(r.FRAMEBUFFER,r.COLOR_ATTACHMENT0,r.TEXTURE_2D,e,0))}function Ud(r,e){me(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,e)),me(r,()=>r.framebufferTexture2D(r.FRAMEBUFFER,r.COLOR_ATTACHMENT0,r.TEXTURE_2D,null,0))}function sc(r){let e=r.checkFramebufferStatus(r.FRAMEBUFFER);if(e!==r.FRAMEBUFFER_COMPLETE)throw new Error("Error binding framebuffer: "+jE(r,e))}function jE(r,e){switch(e){case r.FRAMEBUFFER_INCOMPLETE_ATTACHMENT:return"FRAMEBUFFER_INCOMPLETE_ATTACHMENT";case r.FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:return"FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT";case r.FRAMEBUFFER_INCOMPLETE_DIMENSIONS:return"FRAMEBUFFER_INCOMPLETE_DIMENSIONS";case r.FRAMEBUFFER_UNSUPPORTED:return"FRAMEBUFFER_UNSUPPORTED";default:return`unknown error ${e}`}}function Da(r,e,t){let o=me(r,()=>e());if(o==null)throw new Error(t);return o}function XE(r,e){let t=r.MAX_COMBINED_TEXTURE_IMAGE_UNITS-1,o=e+r.TEXTURE0;if(o<r.TEXTURE0||o>t){let n=`[gl.TEXTURE0, gl.TEXTURE${t}]`;throw new Error(`textureUnit must be in ${n}.`)}}function Pa(r,e=2){return x.sizeFromShape(r.slice(0,r.length-e))}function Oa(r){if(r.length===0)throw Error("Cannot get rows and columns of an empty shape array.");return[r.length>1?r[r.length-2]:1,r[r.length-1]]}function ac(r){let e=[1,1,1];return r.length===0||r.length===1&&r[0]===1||(e=[Pa(r),...Oa(r)]),e}function JI(r,e=!1){let t=P().getNumber("WEBGL_MAX_TEXTURE_SIZE"),o=P().getNumber("WEBGL_MAX_SIZE_FOR_NARROW_TEXTURE");o===1/0&&P().getBool("WEBGL_AUTO_SQUARIFY_NARROW_TEXTURE_SHAPE")&&(o=t/2),e&&(t=t*2,o=o*2,r=r.map((i,p)=>p>=r.length-2?x.nearestLargerEven(r[p]):r[p]),r.length===1&&(r=[2,r[0]])),r.length!==2&&(r=x.squeezeShape(r).newShape);let n=x.sizeFromShape(r),s=null;r.length<=1&&n<=t?s=[1,n]:r.length===2&&r[0]<=t&&r[1]
bool isnan_custom(float val) {
uint floatToUint = floatBitsToUint(val);
return (floatToUint & 0x7fffffffu) > 0x7f800000u;
}
bvec4 isnan_custom(vec4 val) {
return bvec4(isnan_custom(val.x),
isnan_custom(val.y), isnan_custom(val.z), isnan_custom(val.w));
}
#define isnan(value) isnan_custom(value)
`:"",p="",u=`
#define round(value) newRound(value)
int newRound(float value) {
return int(floor(value + 0.5));
}
ivec4 newRound(vec4 value) {
return ivec4(floor(value + vec4(0.5)));
}
`):(r="",e="attribute",t="varying",o="varying",n="texture2D",s="gl_FragColor",a="",i=`
#define isnan(value) isnan_custom(value)
bool isnan_custom(float val) {
return (val > 0. || val < 1. || val == 0.) ? false : true;
}
bvec4 isnan_custom(vec4 val) {
return bvec4(isnan(val.x), isnan(val.y), isnan(val.z), isnan(val.w));
}
`,p=`
uniform float INFINITY;
bool isinf(float val) {
return abs(val) == INFINITY;
}
bvec4 isinf(vec4 val) {
return equal(abs(val), vec4(INFINITY));
}
`,u=`
int round(float value) {
return int(floor(value + 0.5));
}
ivec4 round(vec4 value) {
return ivec4(floor(value + vec4(0.5)));
}
`),{version:r,attribute:e,varyingVs:t,varyingFs:o,texture2D:n,output:s,defineOutput:a,defineSpecialNaN:i,defineSpecialInf:p,defineRound:u}}function is(r,e,t="index"){let o=x.computeStrides(e);return o.map((n,s)=>{let a=`int ${r[s]} = ${t} / ${n}`,i=s===o.length-1?`int ${r[s+1]} = ${t} - ${r[s]} * ${n}`:`index -= ${r[s]} * ${n}`;return`${a}; ${i};`}).join("")}function Ru(r,e,t="index"){let o=x.computeStrides(e);return o.map((n,s)=>{let a=`int ${r[s]} = ${t} / outShapeStrides[${s}]`,i=s===o.length-1?`int ${r[s+1]} = ${t} - ${r[s]} * outShapeStrides[${s}]`:`index -= ${r[s]} * outShapeStrides[${s}]`;return`${a}; ${i};`}).join("")}function x8(r,e){let t=r.length,o=r.map(s=>`${e}[${s}]`),n=new Array(t-1);n[t-2]=o[t-1];for(let s=t-3;s>=0;--s)n[s]=`(${n[s+1]} * ${o[s+1]})`;return n}function YE(r,e,t="index"){let o=r.map((s,a)=>a),n=x8(o,e);return n.map((s,a)=>{let i=`int ${r[a]} = ${t} / ${n[a]}`,p=a===n.length-1?`int ${r[a+1]} = ${t} - ${r[a]} * ${n[a]}`:`index -= ${r[a]} * ${n[a]}`;return`${i}; ${p};`}).join("")}function uc(r){let e=x.computeStrides(r).map(t=>t.toString());return`
int getFlatIndex(ivec3 coords) {
return coords.x * ${e[0]} + coords.y * ${e[1]} + coords.z;
}
`}function pc(){return`
int getFlatIndex(ivec3 coords) {
return coords.x * outShapeStrides[0] + coords.y * outShapeStrides[1] + coords.z;
}
`}var Hd=`
const float FLOAT_MAX = 1.70141184e38;
const float FLOAT_MIN = 1.17549435e-38;
lowp vec4 encode_float(highp float v) {
if (isnan(v)) {
return vec4(255, 255, 255, 255);
}
highp float av = abs(v);
if(av < FLOAT_MIN) {
return vec4(0.0, 0.0, 0.0, 0.0);
} else if(v > FLOAT_MAX) {
return vec4(0.0, 0.0, 128.0, 127.0) / 255.0;
} else if(v < -FLOAT_MAX) {
return vec4(0.0, 0.0, 128.0, 255.0) / 255.0;
}
highp vec4 c = vec4(0,0,0,0);
highp float e = floor(log2(av));
highp float m = exp2(fract(log2(av))) - 1.0;
c[2] = floor(128.0 * m);
m -= c[2] / 128.0;
c[1] = floor(32768.0 * m);
m -= c[1] / 32768.0;
c[0] = floor(8388608.0 * m);
highp float ebias = e + 127.0;
c[3] = floor(ebias / 2.0);
ebias -= c[3] * 2.0;
c[2] += floor(ebias) * 128.0;
c[3] += 128.0 * step(0.0, -v);
return c / 255.0;
}
`;var{getBroadcastDims:QE}=I;function ZE(r,e,t){let o=[];if(r.forEach(f=>{let d=x.sizeFromShape(f.shapeInfo.logicalShape);if(f.shapeInfo.isUniform?o.push(`uniform float ${f.name}${d>1?`[${d}]`:""};`):(o.push(`uniform sampler2D ${f.name};`),o.push(`uniform int offset${f.name};`)),t.enableShapeUniforms){let{uniformShape:h}=qd(t.packedInputs,f.shapeInfo.logicalShape,f.shapeInfo.texShape);switch(h.length){case 1:o.push(`uniform int ${f.name}Shape;`);break;case 2:o.push(`uniform ivec2 ${f.name}Shape;`);break;case 3:o.push(`uniform ivec3 ${f.name}Shape;`);break;case 4:o.push(`uniform ivec4 ${f.name}Shape;`);break;default:break}o.push(`uniform ivec2 ${f.name}TexShape;`)}}),t.enableShapeUniforms){switch(e.logicalShape.length){case 1:o.push("uniform int outShape;");break;case 2:o.push("uniform ivec2 outShape;"),o.push("uniform int outShapeStrides;");break;case 3:o.push("uniform ivec3 outShape;"),o.push("uniform ivec2 outShapeStrides;");break;case 4:o.push("uniform ivec4 outShape;"),o.push("uniform ivec3 outShapeStrides;");break;default:break}o.push("uniform ivec2 outTexShape;")}t.customUniforms&&t.customUniforms.forEach(f=>{o.push(`uniform ${f.type} ${f.name}${f.arrayIndex?`[${f.arrayIndex}]`:""};`)});let n=o.join(`
`),s=r.map(f=>y8(f,e,t.packedInputs,t.enableShapeUniforms)).join(`
`),a=e.texShape,i=Ct(),p=I8(i),u,c,l=v8(i);return e.isPacked?(u=b8(e.logicalShape,a,t.enableShapeUniforms),c=S8(i)):(u=C8(e.logicalShape,a,t.enableShapeUniforms),c=w8(i)),t.packedInputs&&(l+=_8),[l,p,c,n,u,s,t.userCode].join(`
`)}function lc(r,e=!1){let t=r.shapeInfo.logicalShape;switch(t.length){case 0:return V8(r,e);case 1:return W8(r,e);case 2:return G8(r,e);case 3:return q8(r,e);case 4:return j8(r,e);case 5:return X8(r);case 6:return Y8(r);default:throw new Error(`${t.length}-D input sampling is not yet supported`)}}function JE(r,e){switch(r.shapeInfo.logicalShape.length){case 0:return B8(r);case 1:return z8(r,e);case 2:return U8(r,e);case 3:return H8(r,e);default:return K8(r,e)}}function y8(r,e,t=!1,o){let n="";t?n+=JE(r,o):n+=lc(r,o);let s=r.shapeInfo.logicalShape,a=e.logicalShape;return s.length<=a.length&&(t?n+=Q8(r,e):n+=Z8(r,e)),n}function b8(r,e,t){switch(r.length){case 0:return e$();case 1:return E8(r,e,t);case 2:return M8(r,e,t);case 3:return R8(r,e,t);default:return F8(r,e,t)}}function C8(r,e,t){switch(r.length){case 0:return e$();case 1:return $8(r,e,t);case 2:return L8(r,e,t);case 3:return A8(r,e,t);case 4:return D8(r,e,t);case 5:return P8(r,e);case 6:return O8(r,e);default:throw new Error(`${r.length}-D output sampling is not yet supported`)}}function I8(r){return`
float sampleTexture(sampler2D textureSampler, vec2 uv) {
return ${r.texture2D}(textureSampler, uv).r;
}
`}function w8(r){return`
void setOutput(float val) {
${r.output} = vec4(val, 0, 0, 0);
}
`}function S8(r){return`
void setOutput(vec4 val) {
${r.output} = val;
}
`}function v8(r){return`${r.version}
precision highp float;
precision highp int;
precision highp sampler2D;
${r.varyingFs} vec2 resultUV;
${r.defineOutput}
const vec2 halfCR = vec2(0.5, 0.5);
struct ivec5
{
int x;
int y;
int z;
int w;
int u;
};
struct ivec6
{
int x;
int y;
int z;
int w;
int u;
int v;
};
uniform float NAN;
${r.defineSpecialNaN}
${r.defineSpecialInf}
${r.defineRound}
int imod(int x, int y) {
return x - y * (x / y);
}
int idiv(int a, int b, float sign) {
int res = a / b;
int mod = imod(a, b);
if (sign < 0. && mod != 0) {
res -= 1;
}
return res;
}
//Based on the work of Dave Hoskins
//https://www.shadertoy.com/view/4djSRW
#define HASHSCALE1 443.8975
float random(float seed){
vec2 p = resultUV * seed;
vec3 p3 = fract(vec3(p.xyx) * HASHSCALE1);
p3 += dot(p3, p3.yzx + 19.19);
return fract((p3.x + p3.y) * p3.z);
}
${k8}
${T8}
${N8}
`}var k8=`
vec2 uvFromFlat(int texNumR, int texNumC, int index) {
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 packedUVfrom1D(int texNumR, int texNumC, int index) {
int texelIndex = index / 2;
int texR = texelIndex / texNumC;
int texC = texelIndex - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
`,T8=`
vec2 packedUVfrom2D(int texelsInLogicalRow, int texNumR,
int texNumC, int row, int col) {
int texelIndex = (row / 2) * texelsInLogicalRow + (col / 2);
int texR = texelIndex / texNumC;
int texC = texelIndex - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
`,N8=`
vec2 packedUVfrom3D(int texNumR, int texNumC,
int texelsInBatch, int texelsInLogicalRow, int b,
int row, int col) {
int index = b * texelsInBatch + (row / 2) * texelsInLogicalRow + (col / 2);
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
`,_8=`
float getChannel(vec4 frag, vec2 innerDims) {
vec2 modCoord = mod(innerDims, 2.);
return modCoord.x == 0. ?
(modCoord.y == 0. ? frag.r : frag.g) :
(modCoord.y == 0. ? frag.b : frag.a);
}
float getChannel(vec4 frag, int dim) {
float modCoord = mod(float(dim), 2.);
return modCoord == 0. ? frag.r : frag.g;
}
`;function e$(){return`
int getOutputCoords() {
return 0;
}
`}function E8(r,e,t){let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)];return o[0]===1?t?`
int getOutputCoords() {
return 2 * int(resultUV.x * ceil(float(outTexShape[1]) / 2.0));
}
`:`
int getOutputCoords() {
return 2 * int(resultUV.x * ${o[1]}.0);
}
`:o[1]===1?t?`
int getOutputCoords() {
return 2 * int(resultUV.y * ceil(float(outTexShape[0]) / 2.0));
}
`:`
int getOutputCoords() {
return 2 * int(resultUV.y * ${o[0]}.0);
}
`:t?`
int getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
return 2 * (resTexRC.x * packedTexShape[1] + resTexRC.y);
}
`:`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
return 2 * (resTexRC.x * ${o[1]} + resTexRC.y);
}
`}function $8(r,e,t){return e[0]===1?t?`
int getOutputCoords() {
return int(resultUV.x * float(outTexShape[1]));
}
`:`
int getOutputCoords() {
return int(resultUV.x * ${e[1]}.0);
}
`:e[1]===1?t?`
int getOutputCoords() {
return int(resultUV.y * float(outTexShape[0]));
}
`:`
int getOutputCoords() {
return int(resultUV.y * ${e[0]}.0);
}
`:t?`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
return resTexRC.x * outTexShape[1] + resTexRC.y;
}
`:`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
return resTexRC.x * ${e[1]} + resTexRC.y;
}
`}function R8(r,e,t){if(t)return`
ivec3 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
int texelsInLogicalRow = int(ceil(float(outShape[2]) / 2.0));
int texelsInBatch = texelsInLogicalRow * int(ceil(float(outShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int b = index / texelsInBatch;
index -= b * texelsInBatch;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec3(b, r, c);
}
`;let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)],n=Math.ceil(r[2]/2),s=n*Math.ceil(r[1]/2);return`
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
int b = index / ${s};
index -= b * ${s};
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec3(b, r, c);
}
`}function A8(r,e,t){if(t)return`
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
${Ru(["r","c","d"],r)}
return ivec3(r, c, d);
}
`;let o=is(["r","c","d"],r);return`
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${o}
return ivec3(r, c, d);
}
`}function F8(r,e,t){if(t)return`
ivec4 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int texelsInLogicalRow = int(ceil(float(outShape[3]) / 2.0));
int texelsInBatch = texelsInLogicalRow * int(ceil(float(outShape[2]) / 2.0));
int texelsInBatchN = texelsInBatch * outShape[1];
int b2 = index / texelsInBatchN;
index -= b2 * texelsInBatchN;
int b = index / texelsInBatch;
index -= b * texelsInBatch;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec4(b2, b, r, c);
}
`;let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)],n=Math.ceil(r[r.length-1]/2),s=n*Math.ceil(r[r.length-2]/2),a=s,i="",p="b, r, c";for(let u=2;u<r.length-1;u++)a*=r[r.length-u-1],i=`
int b${u} = index / ${a};
index -= b${u} * ${a};
`+i,p=`b${u}, `+p;return`
ivec${r.length} getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
${i}
int b = index / ${s};
index -= b * ${s};
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec${r.length}(${p});
}
`}function D8(r,e,t){if(t)return`
ivec4 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
${Ru(["r","c","d","d2"],r)}
return ivec4(r, c, d, d2);
}
`;let o=is(["r","c","d","d2"],r);return`
ivec4 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${o}
return ivec4(r, c, d, d2);
}
`}function P8(r,e){let t=is(["r","c","d","d2","d3"],r);return`
ivec5 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(${e[0]},
${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${t}
ivec5 outShape = ivec5(r, c, d, d2, d3);
return outShape;
}
`}function O8(r,e){let t=is(["r","c","d","d2","d3","d4"],r);return`
ivec6 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${t}
ivec6 result = ivec6(r, c, d, d2, d3, d4);
return result;
}
`}function M8(r,e,t){let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)];if(x.arraysEqual(r,e))return t?`
ivec2 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
return 2 * ivec2(resultUV.yx * vec2(packedTexShape[0], packedTexShape[1]));
}
`:`
ivec2 getOutputCoords() {
return 2 * ivec2(resultUV.yx * vec2(${o[0]}, ${o[1]}));
}
`;let n=Math.ceil(r[1]/2);return t?`
ivec2 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
int texelsInLogicalRow = int(ceil(float(outShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec2(r, c);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec2(r, c);
}
`}function L8(r,e,t){return x.arraysEqual(r,e)?t?`
ivec2 getOutputCoords() {
return ivec2(resultUV.yx * vec2(outTexShape[0], outTexShape[1]));
}
`:`
ivec2 getOutputCoords() {
return ivec2(resultUV.yx * vec2(${e[0]}, ${e[1]}));
}
`:r[1]===1?t?`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
return ivec2(index, 0);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
return ivec2(index, 0);
}
`:r[0]===1?t?`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
return ivec2(0, index);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
return ivec2(0, index);
}
`:t?`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
int r = index / outShape[1];
int c = index - r * outShape[1];
return ivec2(r, c);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
int r = index / ${r[1]};
int c = index - r * ${r[1]};
return ivec2(r, c);
}
`}function Au(r){return`offset${r}`}function B8(r){let e=r.name,t="get"+e.charAt(0).toUpperCase()+e.slice(1),o=Ct();return`
vec4 ${t}() {
return ${o.texture2D}(${e}, halfCR);
}
`}function V8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1);if(r.shapeInfo.isUniform)return`float ${o}() {return ${t};}`;let[n,s]=r.shapeInfo.texShape;if(n===1&&s===1)return`
float ${o}() {
return sampleTexture(${t}, halfCR);
}
`;let a=Au(t);if(e)return`
float ${o}() {
vec2 uv = uvFromFlat(${t}TexShape[0], ${t}TexShape[1], ${a});
return sampleTexture(${t}, uv);
}
`;let[i,p]=r.shapeInfo.texShape;return`
float ${o}() {
vec2 uv = uvFromFlat(${i}, ${p}, ${a});
return sampleTexture(${t}, uv);
}
`}function z8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),n=r.shapeInfo.texShape,s=Ct();if(e)return`
vec4 ${o}(int index) {
ivec2 packedTexShape = ivec2(ceil(float(${t}TexShape[0]) / 2.0), ceil(float(${t}TexShape[1]) / 2.0));
vec2 uv = packedUVfrom1D(
packedTexShape[0], packedTexShape[1], index);
return ${s.texture2D}(${t}, uv);
}
`;let a=[Math.ceil(n[0]/2),Math.ceil(n[1]/2)];return`
vec4 ${o}(int index) {
vec2 uv = packedUVfrom1D(
${a[0]}, ${a[1]}, index);
return ${s.texture2D}(${t}, uv);
}
`}function W8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1);if(r.shapeInfo.isUniform)return`
float ${o}(int index) {
${mc(r)}
}
`;let n=r.shapeInfo.texShape,s=n[0],a=n[1];if(a===1&&s===1)return`
float ${o}(int index) {
return sampleTexture(${t}, halfCR);
}
`;let i=Au(t);return a===1?e?`
float ${o}(int index) {
vec2 uv = vec2(0.5, (float(index + ${i}) + 0.5) / float(${t}TexShape[0]));
return sampleTexture(${t}, uv);
}
`:`
float ${o}(int index) {
vec2 uv = vec2(0.5, (float(index + ${i}) + 0.5) / ${s}.0);
return sampleTexture(${t}, uv);
}
`:s===1?e?`
float ${o}(int index) {
vec2 uv = vec2((float(index + ${i}) + 0.5) / float(${t}TexShape[1]), 0.5);
return sampleTexture(${t}, uv);
}
`:`
float ${o}(int index) {
vec2 uv = vec2((float(index + ${i}) + 0.5) / ${a}.0, 0.5);
return sampleTexture(${t}, uv);
}
`:e?`
float ${o}(int index) {
vec2 uv = uvFromFlat(${t}TexShape[0], ${t}TexShape[1], index + ${i});
return sampleTexture(${t}, uv);
}
`:`
float ${o}(int index) {
vec2 uv = uvFromFlat(${s}, ${a}, index + ${i});
return sampleTexture(${t}, uv);
}
`}function U8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape,a=s[0],i=s[1],p=Ct();if(s!=null&&x.arraysEqual(t,s))return e?`
vec4 ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return ${p.texture2D}(${o}, uv);
}
`:`
vec4 ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${i}.0, ${a}.0);
return ${p.texture2D}(${o}, uv);
}
`;if(e)return`
vec4 ${n}(int row, int col) {
ivec2 packedTexShape = ivec2(ceil(float(${o}TexShape[0]) / 2.0), ceil(float(${o}TexShape[1]) / 2.0));
int valuesPerRow = int(ceil(float(${o}Shape[1]) / 2.0));
vec2 uv = packedUVfrom2D(valuesPerRow, packedTexShape[0], packedTexShape[1], row, col);
return ${p.texture2D}(${o}, uv);
}
`;let u=[Math.ceil(s[0]/2),Math.ceil(s[1]/2)],c=Math.ceil(t[1]/2);return`
vec4 ${n}(int row, int col) {
vec2 uv = packedUVfrom2D(${c}, ${u[0]}, ${u[1]}, row, col);
return ${p.texture2D}(${o}, uv);
}
`}function G8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape;if(s!=null&&x.arraysEqual(t,s)){if(e)return`
float ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`;let m=s[0],f=s[1];return`
float ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${f}.0, ${m}.0);
return sampleTexture(${o}, uv);
}
`}let{newShape:a,keptDims:i}=x.squeezeShape(t),p=a;if(p.length<t.length){let m=fc(r,p),f=["row","col"];return`
${lc(m,e)}
float ${n}(int row, int col) {
return ${n}(${dc(f,i)});
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col) {
int index = round(dot(vec2(row, col), vec2(${t[1]}, 1)));
${mc(r)}
}
`;let u=s[0],c=s[1],l=Au(o);return c===1?e?`
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${o}Shape[1], 1, 1));
vec2 uv = vec2(0.5, (index + 0.5) / float(${o}TexShape[0]));
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${t[1]}, 1, 1));
vec2 uv = vec2(0.5, (index + 0.5) / ${u}.0);
return sampleTexture(${o}, uv);
}
`:u===1?e?`
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${o}Shape[1], 1, 1));
vec2 uv = vec2((index + 0.5) / float(${o}TexShape[1]), 0.5);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${t[1]}, 1, 1));
vec2 uv = vec2((index + 0.5) / ${c}.0, 0.5);
return sampleTexture(${o}, uv);
}
`:e?`
float ${n}(int row, int col) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${o}Shape[1] + col + ${l};
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${t[1]} + col + ${l};
vec2 uv = uvFromFlat(${u}, ${c}, index);
return sampleTexture(${o}, uv);
}
`}function H8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape,a=[Math.ceil(s[0]/2),Math.ceil(s[1]/2)];if(t[0]===1){let m=t.slice(1),f=[1,2],d=fc(r,m),h=["b","row","col"];return`
${JE(d,e)}
vec4 ${n}(int b, int row, int col) {
return ${n}(${dc(h,f)});
}
`}let i=Ct();if(e)return`
vec4 ${n}(int b, int row, int col) {
ivec2 packedTexShape = ivec2(ceil(float(${o}TexShape[0]) / 2.0), ceil(float(${o}TexShape[1]) / 2.0));
int valuesPerRow = int(ceil(float(${o}Shape[2]) / 2.0));
int texelsInBatch = valuesPerRow * int(ceil(float(${o}Shape[1]) / 2.0));
vec2 uv = packedUVfrom3D(
packedTexShape[0], packedTexShape[1], texelsInBatch, valuesPerRow, b, row, col);
return ${i.texture2D}(${o}, uv);
}
`;let p=a[0],u=a[1],c=Math.ceil(t[2]/2),l=c*Math.ceil(t[1]/2);return`
vec4 ${n}(int b, int row, int col) {
vec2 uv = packedUVfrom3D(
${p}, ${u}, ${l}, ${c}, b, row, col);
return ${i.texture2D}(${o}, uv);
}
`}function q8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=t[1]*t[2],a=t[2],{newShape:i,keptDims:p}=x.squeezeShape(t),u=i;if(u.length<t.length){let h=fc(r,u),g=["row","col","depth"];return`
${lc(h,e)}
float ${n}(int row, int col, int depth) {
return ${n}(${dc(g,p)});
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col, int depth) {
int index = round(dot(vec3(row, col, depth),
vec3(${s}, ${a}, 1)));
${mc(r)}
}
`;let c=r.shapeInfo.texShape,l=c[0],m=c[1],f=r.shapeInfo.flatOffset;if(m===s&&f==null)return e?`
float ${n}(int row, int col, int depth) {
int stride1 = ${o}Shape[2];
float texR = float(row);
float texC = dot(vec2(col, depth), vec2(stride1, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
float texR = float(row);
float texC = dot(vec2(col, depth), vec2(${a}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${m}.0, ${l}.0);
return sampleTexture(${o}, uv);
}
`;if(m===a&&f==null)return e?`
float ${n}(int row, int col, int depth) {
float texR = dot(vec2(row, col), vec2(${o}Shape[1], 1));
float texC = float(depth);
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
float texR = dot(vec2(row, col), vec2(${t[1]}, 1));
float texC = float(depth);
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${m}.0, ${l}.0);
return sampleTexture(${o}, uv);
}
`;let d=Au(o);return e?`
float ${n}(int row, int col, int depth) {
// Explicitly use integer operations as dot() only works on floats.
int stride0 = ${o}Shape[1] * ${o}Shape[2];
int stride1 = ${o}Shape[2];
int index = row * stride0 + col * stride1 + depth + ${d};
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${s} + col * ${a} + depth + ${d};
vec2 uv = uvFromFlat(${l}, ${m}, index);
return sampleTexture(${o}, uv);
}
`}function K8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),n=Ct();if(e)return`
vec4 ${o}(int b2, int b, int row, int col) {
int valuesPerRow = int(ceil(float(${t}Shape[3]) / 2.0));
int texelsInBatch = valuesPerRow * int(ceil(float(${t}Shape[2]) / 2.0));
int index = b * texelsInBatch + (row / 2) * valuesPerRow + (col / 2);
texelsInBatch *= ${t}Shape[1];
index = b2 * texelsInBatch + index;
ivec2 packedTexShape = ivec2(ceil(float(${t}TexShape[0]) / 2.0), ceil(float(${t}TexShape[1]) / 2.0));
int texR = index / packedTexShape[1];
int texC = index - texR * packedTexShape[1];
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(packedTexShape[1], packedTexShape[0]); return ${n.texture2D}(${t}, uv);
}
`;let s=r.shapeInfo.logicalShape,a=s.length,i=r.shapeInfo.texShape,p=[Math.ceil(i[0]/2),Math.ceil(i[1]/2)],u=p[0],c=p[1],l=Math.ceil(s[a-1]/2),m=l*Math.ceil(s[a-2]/2),f="int b, int row, int col",d=`b * ${m} + (row / 2) * ${l} + (col / 2)`;for(let h=2;h<a-1;h++)f=`int b${h}, `+f,m*=s[a-h-1],d=`b${h} * ${m} + `+d;return`
vec4 ${o}(${f}) {
int index = ${d};
int texR = index / ${c};
int texC = index - texR * ${c};
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${c}, ${u});
return ${n.texture2D}(${t}, uv);
}
`}function j8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=t[3],a=t[2]*s,i=t[1]*a,{newShape:p,keptDims:u}=x.squeezeShape(t);if(p.length<t.length){let b=fc(r,p),C=["row","col","depth","depth2"];return`
${lc(b,e)}
float ${n}(int row, int col, int depth, int depth2) {
return ${n}(${dc(C,u)});
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col, int depth, int depth2) {
int index = round(dot(vec4(row, col, depth, depth2),
vec4(${i}, ${a}, ${s}, 1)));
${mc(r)}
}
`;let c=r.shapeInfo.flatOffset,l=r.shapeInfo.texShape,m=l[0],f=l[1],d=`int stride2 = ${o}Shape[3];`,h=`int stride1 = ${o}Shape[2] * stride2;`,g=`int stride0 = ${o}Shape[1] * stride1;`;if(f===i&&c==null)return e?`
float ${n}(int row, int col, int depth, int depth2) {
${d}
${h}
float texR = float(row);
float texC =
dot(vec3(col, depth, depth2),
vec3(stride1, stride2, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
float texR = float(row);
float texC =
dot(vec3(col, depth, depth2),
vec3(${a}, ${s}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${f}.0, ${m}.0);
return sampleTexture(${o}, uv);
}
`;if(f===s&&c==null)return e?`
float ${n}(int row, int col, int depth, int depth2) {
float texR = dot(vec3(row, col, depth),
vec3(${o}Shape[1] * ${o}Shape[2], ${o}Shape[2], 1));
float texC = float(depth2);
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
float texR = dot(vec3(row, col, depth),
vec3(${t[1]*t[2]}, ${t[2]}, 1));
float texC = float(depth2);
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${f}.0, ${m}.0);
return sampleTexture(${o}, uv);
}
`;let y=Au(o);return e?`
float ${n}(int row, int col, int depth, int depth2) {
// Explicitly use integer operations as dot() only works on floats.
${d}
${h}
${g}
int index = row * stride0 + col * stride1 +
depth * stride2 + depth2;
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index + ${y});
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${i} + col * ${a} +
depth * ${s} + depth2;
vec2 uv = uvFromFlat(${m}, ${f}, index + ${y});
return sampleTexture(${o}, uv);
}
`}function X8(r){let e=r.shapeInfo.logicalShape,t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),n=e[4],s=e[3]*n,a=e[2]*s,i=e[1]*a,{newShape:p,keptDims:u}=x.squeezeShape(e);if(p.length<e.length){let h=fc(r,p),g=["row","col","depth","depth2","depth3"];return`
${lc(h)}
float ${o}(int row, int col, int depth, int depth2, int depth3) {
return ${o}(${dc(g,u)});
}
`}if(r.shapeInfo.isUniform)return`
float ${o}(int row, int col, int depth, int depth2, int depth3) {
float index = dot(
vec4(row, col, depth, depth2),
vec4(${i}, ${a}, ${s}, ${n})) +
depth3;
${mc(r)}
}
`;let c=r.shapeInfo.flatOffset,l=r.shapeInfo.texShape,m=l[0],f=l[1];if(f===i&&c==null)return`
float ${o}(int row, int col, int depth, int depth2, int depth3) {
int texR = row;
float texC = dot(vec4(col, depth, depth2, depth3),
vec4(${a}, ${s}, ${n}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${f}.0, ${m}.0);
return sampleTexture(${t}, uv);
}
`;if(f===n&&c==null)return`
float ${o}(int row, int col, int depth, int depth2, int depth3) {
float texR = dot(
vec4(row, col, depth, depth2),
vec4(${e[1]*e[2]*e[3]},
${e[2]*e[3]}, ${e[3]}, 1));
int texC = depth3;
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${f}.0, ${m}.0);
return sampleTexture(${t}, uv);
}
`;let d=Au(t);return`
float ${o}(int row, int col, int depth, int depth2, int depth3) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${i} + col * ${a} + depth * ${s} +
depth2 * ${n} + depth3 + ${d};
vec2 uv = uvFromFlat(${m}, ${f}, index);
return sampleTexture(${t}, uv);
}
`}function Y8(r){let e=r.shapeInfo.logicalShape,t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),{newShape:n,keptDims:s}=x.squeezeShape(e);if(n.length<e.length){let g=fc(r,n),y=["row","col","depth","depth2","depth3","depth4"];return`
${lc(g)}
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
return ${o}(${dc(y,s)});
}
`}let a=e[5],i=e[4]*a,p=e[3]*i,u=e[2]*p,c=e[1]*u;if(r.shapeInfo.isUniform)return`
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
int index = round(dot(
vec4(row, col, depth, depth2),
vec4(${c}, ${u}, ${p}, ${i})) +
dot(
vec2(depth3, depth4),
vec2(${a}, 1)));
${mc(r)}
}
`;let l=r.shapeInfo.flatOffset,m=r.shapeInfo.texShape,f=m[0],d=m[1];if(d===c&&l==null)return`
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
int texR = row;
float texC = dot(vec4(col, depth, depth2, depth3),
vec4(${u}, ${p}, ${i}, ${a})) +
float(depth4);
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${d}.0, ${f}.0);
return sampleTexture(${t}, uv);
}
`;if(d===a&&l==null)return`
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
float texR = dot(vec4(row, col, depth, depth2),
vec4(${e[1]*e[2]*e[3]*e[4]},
${e[2]*e[3]*e[4]},
${e[3]*e[4]},
${e[4]})) + float(depth3);
int texC = depth4;
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${d}.0, ${f}.0);
return sampleTexture(${t}, uv);
}
`;let h=Au(t);return`
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${c} + col * ${u} + depth * ${p} +
depth2 * ${i} + depth3 * ${a} + depth4 + ${h};
vec2 uv = uvFromFlat(${f}, ${d}, index);
return sampleTexture(${t}, uv);
}
`}function mc(r){let e=r.name,t=x.sizeFromShape(r.shapeInfo.logicalShape);return t<2?`return ${e};`:`
for (int i = 0; i < ${t}; i++) {
if (i == index) {
return ${e}[i];
}
}
`}function Q8(r,e){let t=r.name,o=t.charAt(0).toUpperCase()+t.slice(1),n="get"+o+"AtOutCoords",s=r.shapeInfo.logicalShape.length,a=e.logicalShape.length,i=QE(r.shapeInfo.logicalShape,e.logicalShape),p=_e(a),u=a-s,c,l=["x","y","z","w","u","v"];s===0?c="":a<2&&i.length>=1?c="coords = 0;":c=i.map(b=>`coords.${l[b+u]} = 0;`).join(`
`);let m="";a<2&&s>0?m="coords":m=r.shapeInfo.logicalShape.map((b,C)=>`coords.${l[C+u]}`).join(", ");let f="return outputValue;",h=x.sizeFromShape(r.shapeInfo.logicalShape)===1,y=x.sizeFromShape(e.logicalShape)===1;if(s===1&&!h&&!y)f=`
return vec4(outputValue.xy, outputValue.xy);
`;else if(h&&!y)a===1?f=`
return vec4(outputValue.x, outputValue.x, 0., 0.);
`:f=`
return vec4(outputValue.x);
`;else if(i.length){let b=s-2,C=s-1;i.indexOf(b)>-1&&i.indexOf(C)>-1?f="return vec4(outputValue.x);":i.indexOf(b)>-1?f="return vec4(outputValue.x, outputValue.y, outputValue.x, outputValue.y);":i.indexOf(C)>-1&&(f="return vec4(outputValue.xx, outputValue.zz);")}return`
vec4 ${n}() {
${p} coords = getOutputCoords();
${c}
vec4 outputValue = get${o}(${m});
${f}
}
`}function Z8(r,e){let t=r.name,o=t.charAt(0).toUpperCase()+t.slice(1),n="get"+o+"AtOutCoords",s=e.texShape,a=r.shapeInfo.texShape,i=r.shapeInfo.logicalShape.length,p=e.logicalShape.length;if(!r.shapeInfo.isUniform&&i===p&&r.shapeInfo.flatOffset==null&&x.arraysEqual(a,s))return`
float ${n}() {
return sampleTexture(${t}, resultUV);
}
`;let u=_e(p),c=QE(r.shapeInfo.logicalShape,e.logicalShape),l=p-i,m,f=["x","y","z","w","u","v"];i===0?m="":p<2&&c.length>=1?m="coords = 0;":m=c.map(h=>`coords.${f[h+l]} = 0;`).join(`
`);let d="";return p<2&&i>0?d="coords":d=r.shapeInfo.logicalShape.map((h,g)=>`coords.${f[g+l]}`).join(", "),`
float ${n}() {
${u} coords = getOutputCoords();
${m}
return get${o}(${d});
}
`}function _e(r){if(r<=1)return"int";if(r===2)return"ivec2";if(r===3)return"ivec3";if(r===4)return"ivec4";if(r===5)return"ivec5";if(r===6)return"ivec6";throw Error(`GPU for rank ${r} is not yet supported`)}function qd(r,e,t){let{newShape:o,keptDims:n}=x.squeezeShape(e),s=e.length,a=r&&s===3&&e[0]===1,i=a?e.slice(1):o,p=!r&&s>1&&!x.arraysEqual(e,t)&&o.length<s||a;return{useSqueezeShape:p,uniformShape:p?i:e,keptDims:n}}function fc(r,e){let t=JSON.parse(JSON.stringify(r));return t.shapeInfo.logicalShape=e,t}function dc(r,e){return e.map(t=>r[t]).join(", ")}function r$(r,e,t,o){let n=t.map((c,l)=>{let m={logicalShape:c.shape,texShape:c.isUniform?null:c.texData.texShape,isUniform:c.isUniform,isPacked:c.isUniform?!1:c.texData.isPacked,flatOffset:null};return c.texData!=null&&c.texData.slice!=null&&c.texData.slice.flatOffset>0&&(m.flatOffset=c.texData.slice.flatOffset),{name:e.variableNames[l],shapeInfo:m}}),s=n.map(c=>c.shapeInfo),a={logicalShape:o.shape,texShape:o.texData.texShape,isUniform:!1,isPacked:o.texData.isPacked,flatOffset:null},i=ZE(n,a,e),p=WI(r.gl,i),u=r.createProgram(p);return P().get("ENGINE_COMPILE_ONLY")?{program:e,fragmentShader:p,source:i,webGLProgram:u,inShapeInfos:s,outShapeInfo:a,uniformLocations:null,customUniformLocations:null,infLoc:null,nanLoc:null,inShapesLocations:null,inTexShapesLocations:null,outShapeLocation:null,outShapeStridesLocation:null,outTexShapeLocation:null}:Object.assign({program:e,fragmentShader:p,source:i,webGLProgram:u,inShapeInfos:s,outShapeInfo:a},aw(r,e,u))}function aw(r,e,t){let o={},n={},s={},a=[],i,p,u,c=null,l=null;l=r.getUniformLocation(t,"NAN",!1),P().getNumber("WEBGL_VERSION")===1&&(c=r.getUniformLocation(t,"INFINITY",!1));let m=!1;for(let f=0;f<e.variableNames.length;f++){let d=e.variableNames[f];o[d]=r.getUniformLocation(t,d,m),o[`offset${d}`]=r.getUniformLocation(t,`offset${d}`,m),e.enableShapeUniforms&&(n[`${d}Shape`]=r.getUniformLocation(t,`${d}Shape`,m),s[`${d}TexShape`]=r.getUniformLocation(t,`${d}TexShape`,m))}return e.enableShapeUniforms&&(i=r.getUniformLocation(t,"outShape",m),u=r.getUniformLocation(t,"outShapeStrides",m),p=r.getUniformLocation(t,"outTexShape",m)),e.customUniforms&&e.customUniforms.forEach((f,d)=>{a[d]=r.getUniformLocation(t,f.name,m)}),{uniformLocations:o,customUniformLocations:a,infLoc:c,nanLoc:l,inShapesLocations:n,inTexShapesLocations:s,outShapeLocation:i,outShapeStridesLocation:u,outTexShapeLocation:p}}function t$(r,e){if(r.length!==e.length)throw Error(`Binary was compiled with ${r.length} inputs, but was executed with ${e.length} inputs`);r.forEach((t,o)=>{let n=t.logicalShape,s=e[o],a=s.shape;if(!x.arraysEqual(n,a))throw Error(`Binary was compiled with different shapes than the current args. Shapes ${n} and ${a} must match`);if(t.isUniform&&s.isUniform)return;let i=t.texShape,p=s.isUniform?null:s.texData.texShape;if(!x.arraysEqual(i,p))throw Error(`Binary was compiled with different texture shapes than the current args. Shape ${i} and ${p} must match`)})}function o$(r,e,t,o,n){e.program.enableShapeUniforms||(t$(e.inShapeInfos,t),t$([e.outShapeInfo],[o]));let s=o.texData.texture,a=o.texData.texShape;o.texData.isPacked?r.setOutputPackedMatrixTexture(s.texture,a[0],a[1]):r.setOutputMatrixTexture(s.texture,a[0],a[1]),r.setProgram(e.webGLProgram),P().getNumber("WEBGL_VERSION")===1&&e.infLoc!==null&&r.gl.uniform1f(e.infLoc,1/0),e.nanLoc!==null&&r.gl.uniform1f(e.nanLoc,NaN),t.forEach((p,u)=>{let c=e.program.variableNames[u],l=e.uniformLocations[c],m=e.uniformLocations[`offset${c}`],f=e.inShapesLocations[`${c}Shape`],d=e.inTexShapesLocations[`${c}TexShape`];if(f){let{uniformShape:h}=qd(e.program.packedInputs,p.shape,p.texData.texShape);switch(h.length){case 1:r.gl.uniform1iv(f,new Int32Array(h));break;case 2:r.gl.uniform2iv(f,new Int32Array(h));break;case 3:r.gl.uniform3iv(f,new Int32Array(h));break;case 4:r.gl.uniform4iv(f,new Int32Array(h));break;default:break}}if(d&&r.gl.uniform2i(d,p.texData.texShape[0],p.texData.texShape[1]),l!=null){if(p.isUniform){if(x.sizeFromShape(p.shape)<2)r.gl.uniform1f(l,p.uniformValues[0]);else{let h=p.uniformValu
ivec3 outCoordsFromFlatIndex(int index) {
${this.enableShapeUniforms?Ru(["r","c","d"],e):is(["r","c","d"],e)}
return ivec3(r, c, d);
}
void main() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(texShape[0], texShape[1]));
int index = 4 * (resTexRC.x * texShape[1] + resTexRC.y);
vec4 result = vec4(0.);
for (int i=0; i<4; i++) {
int flatIndex = index + i;
ivec3 rc = outCoordsFromFlatIndex(flatIndex);
result[i] = getA(rc.x, rc.y, rc.z);
}
${t.output} = result;
}
`}};var jd=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outPackingScheme=ki.DENSE,this.customUniforms=[{name:"texShape",type:"ivec2"}];let t=Ct();this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
ivec3 outCoordsFromFlatIndex(int index) {
${this.enableShapeUniforms?Ru(["r","c","d"],e):is(["r","c","d"],e)}
return ivec3(r, c, d);
}
void main() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(texShape[0], texShape[1]));
int index = 4 * (resTexRC.x * texShape[1] + resTexRC.y);
vec4 result = vec4(0.);
for (int i=0; i<4; i++) {
int flatIndex = index + i;
ivec3 rc = outCoordsFromFlatIndex(flatIndex);
result[i] = getChannel(getA(rc.x, rc.y, rc.z), vec2(rc.y, rc.z));
}
${t.output} = result;
}
`}};var Xd=class{constructor(e){this.variableNames=["A"],this.outTexUsage=ir.DOWNLOAD;let t=Ct();this.outputShape=e,this.userCode=`
${Hd}
void main() {
float x = getAAtOutCoords();
${t.output} = encode_float(x);
}
`}};var Yd=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!1,this.outTexUsage=ir.DOWNLOAD;let t=Ct();this.outputShape=e,this.userCode=`
${Hd}
void main() {
ivec3 coords = getOutputCoords();
float x = getChannel(getAAtOutCoords(), vec2(coords.y, coords.z));
${t.output} = encode_float(x);
}
`}};var tY={R:0,G:1,B:2,A:3},Al=class{constructor(e,t=!1,o="RGBA"){this.variableNames=["A"],this.customUniforms=[{name:"texShape",type:"ivec2"}];let n=Ct();this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let s="result";t&&(s="floor(result * 255. + 0.5)");let a="";for(let i=0;i<o.length;i++){let p=o[i];a+=`
if(offset == ${i}) {
result = values[${tY[p]}];
}`}this.userCode=`
${this.enableShapeUniforms?pc():uc(e)}
void main() {
ivec3 coords = getOutputCoords();
int flatIndex = getFlatIndex(coords);
float result = 0.;
int offset = imod(flatIndex, ${o.length});
flatIndex = idiv(flatIndex, ${o.length}, 1.);
int r = flatIndex / texShape[1];
if (r < texShape[0]) {
int c = imod(flatIndex, texShape[1]);
vec2 uv = (vec2(c, r) + halfCR) / vec2(texShape[1], texShape[0]);
vec4 values = ${n.texture2D}(A, uv);
${a}
}
${n.output} = vec4(${s}, 0., 0., 0.);
}
`}};var Qd=class{constructor(e,t=!1){this.variableNames=["A"],this.packedInputs=!1,this.packedOutput=!0,this.customUniforms=[{name:"texShape",type:"ivec2"}];let o=Ct();this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let n="",s="result";t&&(s="floor(result * 255. + 0.5)");for(let a=0;a<=1;a++)for(let i=0;i<=1;i++){let p=a*2+i;n+=`
localCoords = coords;
if(localCoords[2] + ${i} < ${this.enableShapeUniforms?"outShape[2]":`${e[2]}`}) {
localCoords[2] += ${i};
if (localCoords[1] + ${a} < ${this.enableShapeUniforms?"outShape[1]":`${e[1]}`}) {
localCoords[1] += ${a};
flatIndex = getFlatIndex(localCoords);
offset = imod(flatIndex, 4);
flatIndex = idiv(flatIndex, 4, 1.);
int r = flatIndex / texShape[1];
int c = imod(flatIndex, texShape[1]);
vec2 uv = (vec2(c, r) + halfCR) / vec2(texShape[1], texShape[0]);
values = ${o.texture2D}(A, uv);
if (offset == 0) {
result[${p}] = values[0];
} else if (offset == 1) {
result[${p}] = values[1];
} else if (offset == 2) {
result[${p}] = values[2];
} else {
result[${p}] = values[3];
}
}
}
`}this.userCode=`
${this.enableShapeUniforms?pc():uc(e)}
void main() {
ivec3 coords = getOutputCoords();
vec4 result = vec4(0.);
int flatIndex, r, c, offset;
ivec3 localCoords;
vec2 uv;
vec4 values;
${n}
${o.output} = ${s};
}
`}};var Sw={};Be(Sw,{bindVertexProgramAttributeStreams:()=>hw,createBufferFromOutputTexture:()=>yw,createFloat16MatrixTexture:()=>lw,createFloat16PackedMatrixTexture:()=>dw,createFloat32MatrixTexture:()=>cw,createIndexBuffer:()=>pw,createPackedMatrixTexture:()=>fw,createUnsignedBytesMatrixTexture:()=>mw,createVertexBuffer:()=>uw,createVertexShader:()=>iw,downloadByteEncodedFloatMatrixFromOutputTexture:()=>Cw,downloadFloat32MatrixFromBuffer:()=>bw,downloadMatrixFromPackedOutputTexture:()=>ww,downloadPackedMatrixFromBuffer:()=>Iw,getInternalFormatForFloat16MatrixTexture:()=>Jd,getInternalFormatForFloat16PackedMatrixTexture:()=>rh,getInternalFormatForFloat32MatrixTexture:()=>Zd,getInternalFormatForPackedMatrixTexture:()=>th,getInternalFormatForUnsignedBytesMatrixTexture:()=>eh,uploadDenseMatrixToTexture:()=>gw,uploadPixelDataToTexture:()=>xw});function iw(r){let e=Ct(),t=`${e.version}
precision highp float;
${e.attribute} vec3 clipSpacePos;
${e.attribute} vec2 uv;
${e.varyingVs} vec2 resultUV;
void main() {
gl_Position = vec4(clipSpacePos, 1);
resultUV = uv;
}`;return zI(r,t)}function uw(r){let e=new Float32Array([-1,1,0,0,1,-1,-1,0,0,0,1,1,0,1,1,1,-1,0,1,0]);return HI(r,e)}function pw(r){let e=new Uint16Array([0,1,2,2,1,3]);return qI(r,e)}function Fl(r,e,t,o,n,s){jI(e,t);let a=KI(r),i=r.TEXTURE_2D;return me(r,()=>r.bindTexture(i,a)),me(r,()=>r.texParameteri(i,r.TEXTURE_WRAP_S,r.CLAMP_TO_EDGE)),me(r,()=>r.texParameteri(i,r.TEXTURE_WRAP_T,r.CLAMP_TO_EDGE)),me(r,()=>r.texParameteri(i,r.TEXTURE_MIN_FILTER,r.NEAREST)),me(r,()=>r.texParameteri(i,r.TEXTURE_MAG_FILTER,r.NEAREST)),P().getNumber("WEBGL_VERSION")===1?me(r,()=>r.texImage2D(i,0,o,e,t,0,n,s,null)):me(r,()=>r.texStorage2D(i,1,o,e,t)),me(r,()=>r.bindTexture(r.TEXTURE_2D,null)),{texture:a,texShape:[t,e]}}function Zd(r){return r.internalFormatFloat}function cw(r,e,t,o){let[n,s]=$u(e,t);return Fl(r,n,s,Zd(o),o.textureFormatFloat,r.FLOAT)}function Jd(r){return r.internalFormatHalfFloat}function lw(r,e,t,o){let[n,s]=$u(e,t);return Fl(r,n,s,Jd(o),o.textureFormatFloat,o.textureTypeHalfFloat)}function eh(r){return r.downloadTextureFormat}function mw(r,e,t,o){let[n,s]=$u(e,t);return Fl(r,n,s,eh(o),r.RGBA,r.UNSIGNED_BYTE)}function th(r){return r.internalFormatPackedFloat}function fw(r,e,t,o){let[n,s]=Ks(e,t);return Fl(r,n,s,th(o),r.RGBA,r.FLOAT)}function rh(r){return r.internalFormatPackedHalfFloat}function dw(r,e,t,o){let[n,s]=Ks(e,t);return Fl(r,n,s,rh(o),r.RGBA,o.textureTypeHalfFloat)}function hw(r,e,t){return me(r,()=>r.bindBuffer(r.ARRAY_BUFFER,t)),Wd(r,e,"clipSpacePos",t,3,20,0)&&Wd(r,e,"uv",t,2,20,12)}function gw(r,e,t,o,n,s){me(r,()=>r.bindTexture(r.TEXTURE_2D,e));let a,i,p;n instanceof Uint8Array?(a=new Uint8Array(t*o*4),i=r.UNSIGNED_BYTE,p=r.RGBA):(a=new Float32Array(t*o*4),i=r.FLOAT,p=s.internalFormatPackedFloat),a.set(n),P().getNumber("WEBGL_VERSION")===2?me(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,t,o,r.RGBA,i,a)):me(r,()=>r.texImage2D(r.TEXTURE_2D,0,p,t,o,0,r.RGBA,i,a)),me(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function xw(r,e,t){me(r,()=>r.bindTexture(r.TEXTURE_2D,e)),t.data instanceof Uint8Array?P().getNumber("WEBGL_VERSION")===2?me(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,t.width,t.height,r.RGBA,r.UNSIGNED_BYTE,t.data)):me(r,()=>r.texImage2D(r.TEXTURE_2D,0,r.RGBA,t.width,t.height,0,r.RGBA,r.UNSIGNED_BYTE,t.data)):P().getNumber("WEBGL_VERSION")===2?me(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,r.RGBA,r.UNSIGNED_BYTE,t)):me(r,()=>r.texImage2D(r.TEXTURE_2D,0,r.RGBA,r.RGBA,r.UNSIGNED_BYTE,t)),me(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function yw(r,e,t,o){let n=r.createBuffer();me(r,()=>r.bindBuffer(r.PIXEL_PACK_BUFFER,n));let i=4*4*e*t;return me(r,()=>r.bufferData(r.PIXEL_PACK_BUFFER,i,r.STREAM_READ)),me(r,()=>r.readPixels(0,0,t,e,r.RGBA,r.FLOAT,0)),me(r,()=>r.bindBuffer(r.PIXEL_PACK_BUFFER,null)),n}function bw(r,e,t){let o=r,n=new Float32Array(t);return o.bindBuffer(o.PIXEL_PACK_BUFFER,e),o.getBufferSubData(o.PIXEL_PACK_BUFFER,0,n),o.bindBuffer(o.PIXEL_PACK_BUFFER,null),n}function Cw(r,e,t,o){let[n,s]=$u(e,t),a=4,i=new Uint8Array(GE(e*t,a));return me(r,()=>r.readPixels(0,0,n,s,o.downloadTextureFormat,r.UNSIGNED_BYTE,i)),new Float32Array(i.buffer)}function Iw(r,e,t,o,n,s,a,i){let p=r,u=new Float32Array(HE(s,a));return p.bindBuffer(p.PIXEL_PACK_BUFFER,e),p.getBufferSubData(p.PIXEL_PACK_BUFFER,0,u),p.bindBuffer(p.PIXEL_PACK_BUFFER,null),u}function ww(r,e,t){let o=new Float32Array(e*t*4);return me(r,()=>r.readPixels(0,0,t,e,r.RGBA,r.FLOAT,o)),o}var Fu=class{constructor(e){this.outputTexture=null,this.program=null,this.disposed=!1,this.vertexAttrsAreBound=!1,this.itemsToPoll=[];let t=P().getNumber("WEBGL_VERSION");e!=null?(this.gl=e,MI(t,e)):this.gl=Gr(t);let o="WEBGL_color_buffer_float",n="EXT_color_buffer_half_float";if(this.parallelCompilationExtension=this.gl.getExtension("KHR_parallel_shader_compile"),P().getNumber("WEBGL_VERSION")===1){let s="OES_texture_float",a="OES_texture_half_float";if(this.textureFloatExtension=nc(this.gl,s),Hr(this.gl,a))this.textureHalfFloatExtension=nc(this.gl,a);else if(P().get("WEBGL_FORCE_F16_TEXTURES"))throw new Error("GL context does not support half float textures, yet the environm
void main() {
setOutput(vec4(getA(), 0., 0., 0.));
}
`;else{let t=$t("rc",this.rank),o=_e(this.rank),n=this.getOutOfBoundsCondition(t),s=this.getSetup(t),a=this.getOutput(t);this.userCode=`
void main() {
${o} rc = getOutputCoords();
if(${n}) {
setOutput(vec4(0));
} else {
${s}
setOutput(vec4(${a}));
}
}
`}}getSourceCoordsArr(e){let t=[];for(let o=0;o<=1;o++)for(let n=0;n<=1;n++){let s=`${o===0?"r":"rp1"}, ${n===0?"c":"cp1"}`;for(let a=2;a<this.rank;a++)s=`${e[e.length-1-a]},`+s;t.push(s)}return t}getOutOfBoundsCondition(e){if(this.rank===1)return`rc > ${this.enableShapeUniforms?"outShape":this.outputShape[0]}`;let t="";for(let o=this.rank-2;o<this.rank;o++)t+=`${e[o]} >= ${this.enableShapeUniforms?`outShape[${o}]`:this.outputShape[o]}`,o<this.rank-1&&(t+="||");return t}getSetup(e){if(this.rank===1)return"";let t=e.slice(-2),o=this.enableShapeUniforms?`outShape[${this.rank} - 1]`:this.outputShape[this.rank-1],n=this.enableShapeUniforms?`outShape[${this.rank} - 2]`:this.outputShape[this.rank-2];return`
int r = ${t[0]};
int c = ${t[1]};
int rp1 = r + 1;
int cp1 = c + 1;
bool cEdge = cp1 >= ${o};
bool rEdge = rp1 >= ${n};
`}getOutput(e){let t=this.getSourceCoordsArr(e);return this.rank===1?`getA(rc), (rc + 1 >= ${this.enableShapeUniforms?"outShape":this.outputShape[0]} ? 0. : getA(rc + 1)), 0, 0`:`getA(${t[0]}),
cEdge ? 0. : getA(${t[1]}),
rEdge ? 0. : getA(${t[2]}),
rEdge || cEdge ? 0. : getA(${t[3]})`}};var hc=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"inputShape",type:"ivec3"}],this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let o="";for(let n=0;n<4;n++){let s="thisRC = rc;";n%2===1&&(s+="thisRC.z += 1;"),n>1&&(s+="thisRC.y += 1;"),o+=`
${s}
${n>0?"if(thisRC.y < rows && thisRC.z < cols){":""}
int flatIndex = getFlatIndex(thisRC);
ivec3 inputRC = inputCoordsFromReshapedOutCoords(flatIndex);
vec2 inputRCInnerDims = vec2(float(inputRC.y),float(inputRC.z));
result[${n}] =
getChannel(getA(inputRC.x, inputRC.y, inputRC.z), inputRCInnerDims);
${n>0?"}":""}
`}this.userCode=`
${oY(t,this.enableShapeUniforms)}
${this.enableShapeUniforms?pc():uc(e)}
void main() {
ivec3 rc = getOutputCoords();
vec4 result = vec4(0.);
ivec3 thisRC;
int rows = ${this.enableShapeUniforms?"outShape[1]":e[1]};
int cols = ${this.enableShapeUniforms?"outShape[2]":e[2]};
${o}
setOutput(result);
}
`}};function oY(r,e){return`
ivec3 inputCoordsFromReshapedOutCoords(int index) {
${e?YE(["r","c","d"],"inputShape"):is(["r","c","d"],r)}
return ivec3(r, c, d);
}
`}var ih=class{constructor(e){this.gpgpu=e,this.numUsedTextures=0,this.numFreeTextures=0,this._numBytesAllocated=0,this._numBytesFree=0,this.freeTextures={},this.logEnabled=!1,this.usedTextures={}}acquireTexture(e,t,o){let n=Y$(t,o),s=Q$(e,n,o);s in this.freeTextures||(this.freeTextures[s]=[]),s in this.usedTextures||(this.usedTextures[s]=[]);let a=X$(e,n,this.gpgpu.gl,this.gpgpu.textureConfig,o);if(this.freeTextures[s].length>0){this.numFreeTextures--,this.numUsedTextures++,this._numBytesFree-=a,this.log();let p=this.freeTextures[s].shift();return this.usedTextures[s].push(p),p}let i;return n===Jt.PACKED_2X2_FLOAT32?i=this.gpgpu.createPackedMatrixTexture(e[0],e[1]):n===Jt.PACKED_2X2_FLOAT16?i=this.gpgpu.createFloat16PackedMatrixTexture(e[0],e[1]):n===Jt.UNPACKED_FLOAT32?i=this.gpgpu.createFloat32MatrixTexture(e[0],e[1]):n===Jt.UNPACKED_FLOAT16?i=this.gpgpu.createFloat16MatrixTexture(e[0],e[1]):n===Jt.PACKED_4X1_UNSIGNED_BYTE&&(i=this.gpgpu.createUnsignedBytesMatrixTexture(e[0],e[1])),this.usedTextures[s].push(i),this.numUsedTextures++,this._numBytesAllocated+=a,this.log(),i}releaseTexture(e,t,o,n){if(this.freeTextures==null)return;let s=Y$(o,n),a=Q$(t,s,n);a in this.freeTextures||(this.freeTextures[a]=[]);let i=X$(t,s,this.gpgpu.gl,this.gpgpu.textureConfig,n),p=P().get("WEBGL_DELETE_TEXTURE_THRESHOLD");p!==-1&&this._numBytesAllocated>p?(this.gpgpu.deleteMatrixTexture(e.texture),this._numBytesAllocated-=i):(this.freeTextures[a].push(e),this.numFreeTextures++,this._numBytesFree+=i),this.numUsedTextures--;let u=this.usedTextures[a],c=u.indexOf(e);if(c<0)throw new Error("Cannot release a texture that was never provided by this texture manager");u.splice(c,1),this.log()}log(){if(!this.logEnabled)return;let e=this.numFreeTextures+this.numUsedTextures;console.log("Free/Used",`${this.numFreeTextures} / ${this.numUsedTextures}`,`(${e})`);let t=this._numBytesFree/this._numBytesAllocated;console.log(`Bytes allocated: ${this._numBytesAllocated}`),console.log(`Bytes unused: ${this._numBytesFree} (${Math.round(100*t)}%)`)}get numBytesAllocated(){return this._numBytesAllocated}get numBytesFree(){return this._numBytesFree}getNumUsedTextures(){return this.numUsedTextures}getNumFreeTextures(){return this.numFreeTextures}dispose(){if(this.freeTextures!=null){for(let e in this.freeTextures)this.freeTextures[e].forEach(t=>{this.gpgpu.deleteMatrixTexture(t.texture)});for(let e in this.usedTextures)this.usedTextures[e].forEach(t=>{this.gpgpu.deleteMatrixTexture(t.texture)});this.freeTextures=null,this.usedTextures=null,this.numUsedTextures=0,this.numFreeTextures=0,this._numBytesAllocated=0,this._numBytesFree=0}}};function nY(r,e){let t=r;if(e===t.R32F)return 4;if(e===t.R16F)return 2;if(e===t.RGBA32F)return 16;if(e===r.RGBA)return 16;if(e===t.RGBA16F)return 8;if(e===t.RGBA8)return 4;throw new Error(`Unknown internal format ${e}`)}function X$(r,e,t,o,n){let s=sY(e,o),a;if(n){let[p,u]=Ks(r[0],r[1]);a=p*u}else{let[p,u]=$u(r[0],r[1]);a=p*u}let i=nY(t,s);return a*i}function sY(r,e){switch(r){case Jt.PACKED_2X2_FLOAT32:return th(e);case Jt.PACKED_2X2_FLOAT16:return rh(e);case Jt.UNPACKED_FLOAT32:return Zd(e);case Jt.UNPACKED_FLOAT16:return Jd(e);case Jt.PACKED_4X1_UNSIGNED_BYTE:return eh(e);default:throw new Error(`Unknown physical texture type ${r}`)}}function aY(r){return P().getBool("WEBGL_RENDER_FLOAT32_ENABLED")?r?Jt.PACKED_2X2_FLOAT32:Jt.UNPACKED_FLOAT32:r?Jt.PACKED_2X2_FLOAT16:Jt.UNPACKED_FLOAT16}function Y$(r,e){if(r===ir.UPLOAD)return Jt.PACKED_2X2_FLOAT32;if(r===ir.RENDER||r==null)return aY(e);if(r===ir.DOWNLOAD||r===ir.PIXELS)return Jt.PACKED_4X1_UNSIGNED_BYTE;throw new Error(`Unknown logical texture type ${r}`)}function Q$(r,e,t){return`${r[0]}_${r[1]}_${e}_${t}`}var fr=class{constructor(e,t){this.variableNames=["A"],this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
float unaryOperation(float x) {
${t}
}
void main() {
float x = getAAtOutCoords();
float y = unaryOperation(x);
setOutput(y);
}
`}},Vt="if (isnan(x)) return x;",Z$="return x;",kw="return abs(x);";var J$="return (x >= 0.0) ? x : (exp(x) - 1.0);",eR=Vt+`
return (x < 0.0) ? 0.0 : x;
`,tR=Vt+`
return (x < 0.0) ? 0.0 : min(6.0, x);
`,Pu="return x;",rR="return 1.0 / (1.0 + exp(-1.0 * x));";var nR="return x;",sR=`
vec4 result;
result.r = (x.r >= 0.0) ? x.r : (exp(x.r) - 1.0);
result.g = (x.g >= 0.0) ? x.g : (exp(x.g) - 1.0);
result.b = (x.b >= 0.0) ? x.b : (exp(x.b) - 1.0);
result.a = (x.a >= 0.0) ? x.a : (exp(x.a) - 1.0);
return result;
`,aR=`
vec4 result = x * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,iR=`
vec4 result = min(x, vec4(6.)) * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,uR="return 1.0 / (1.0 + exp(-1.0 * x));",No=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
vec4 unaryOperation(vec4 x) {
${t}
}
void main() {
vec4 x = getAAtOutCoords();
vec4 y = unaryOperation(x);
setOutput(y);
}
`}};var uh=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!1,this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let t=e.length,o=$t("rc",t),n=_e(t),s=j$(t,o),a=o.slice(-2),i=t<=1?"rc":`vec2(${a.join(",")})`;this.userCode=`
void main() {
${n} rc = getOutputCoords();
vec4 packedInput = getA(${s});
setOutput(getChannel(packedInput, ${i}));
}
`}};var uY=Bt.whereImpl,pY=1e-7,cY=1e-4,ph={};function lY(r){return r in ph||(ph[r]={}),ph[r]}var mY=P().getNumber("CPU_HANDOFF_SIZE_THRESHOLD"),fY=600;function dY(){return P().global.screen==null?1024:P().global.screen.height*P().global.screen.width*window.devicePixelRatio*fY/1024/1024}var Ni=class extends Jr{constructor(e){if(super(),this.pendingRead=new WeakMap,this.pendingDisposal=new WeakSet,this.dataRefCount=new WeakMap,this.numBytesInGPU=0,this.uploadWaitMs=0,this.downloadWaitMs=0,this.lastGlFlushTime=0,this.warnedAboutMemory=!1,this.pendingDeletes=0,this.disposed=!1,!P().getBool("HAS_WEBGL"))throw new Error("WebGL is not supported on this device");let t;if(e!=null){if(e instanceof Fu)t=e;else{let o=Gr(P().getNumber("WEBGL_VERSION"),e);t=new Fu(o)}this.binaryCache={},this.gpgpuCreatedLocally=!1}else{let o=Gr(P().getNumber("WEBGL_VERSION"));t=new Fu(o),this.binaryCache=lY(P().getNumber("WEBGL_VERSION")),this.gpgpuCreatedLocally=!0}this.gpgpu=t,this.canvas=this.gpgpu.gl.canvas,this.textureManager=new ih(this.gpgpu),this.numMBBeforeWarning=dY(),this.texData=new rn(this,cr())}nextDataId(){return Ni.nextDataId++}numDataIds(){return this.texData.numDataIds()-this.pendingDeletes}writeTexture(e,t,o,n,s,a){let i=this.makeTensorInfo(t,o),p=this.texData.get(i.dataId);p.isPacked=!1,p.texture={texture:e,texShape:[n,s]},p.texShape=[n,s];let u=ac(t),c=new Al(u,!1,a),l=this.runWebGLProgram(c,[i],o,[[n,s]]);return l.shape=t,p.texture=null,this.disposeIntermediateTensorInfo(i),l.dataId}write(e,t,o){if((P().getBool("WEBGL_CHECK_NUMERICAL_PROBLEMS")||P().getBool("DEBUG"))&&this.checkNumericalProblems(e),o==="complex64"&&e!=null)throw new Error("Cannot write to a complex64 dtype. Please use tf.complex(real, imag).");let n={id:this.nextDataId()};return this.texData.set(n,{shape:t,dtype:o,values:e,usage:ir.UPLOAD,refCount:1}),n}refCount(e){return this.texData.has(e)?this.texData.get(e).refCount:0}incRef(e){let t=this.texData.get(e);t.refCount++}decRef(e){if(this.texData.has(e)){let t=this.texData.get(e);t.refCount--}}move(e,t,o,n,s){if(P().getBool("DEBUG")&&this.checkNumericalProblems(t),n==="complex64")throw new Error("Cannot write to a complex64 dtype. Please use tf.complex(real, imag).");this.texData.set(e,{shape:o,dtype:n,values:t,usage:ir.UPLOAD,refCount:s})}disposeIntermediateTensorInfo(e){this.disposeData(e.dataId)}readSync(e){let t=this.texData.get(e),{values:o,dtype:n,complexTensorInfos:s,slice:a,shape:i,isPacked:p}=t;if(a!=null){let m;p?m=new No(i,Pu):m=new fr(i,Pu);let f=this.runWebGLProgram(m,[{dataId:e,shape:i,dtype:n}],n),d=this.readSync(f.dataId);return this.disposeIntermediateTensorInfo(f),d}if(o!=null)return this.convertAndCacheOnCPU(e);if(n==="string")return o;let u=this.activeTimers!=null,c;u&&(c=x.now());let l;if(n==="complex64"){let m=this.readSync(s.real.dataId),f=this.readSync(s.imag.dataId);l=I.mergeRealAndImagArrays(m,f)}else l=this.getValuesFromTexture(e);return u&&(this.downloadWaitMs+=x.now()-c),this.convertAndCacheOnCPU(e,l)}async read(e){if(this.pendingRead.has(e)){let d=this.pendingRead.get(e);return new Promise(h=>d.push(h))}let t=this.texData.get(e),{values:o,shape:n,slice:s,dtype:a,complexTensorInfos:i,isPacked:p}=t;if(s!=null){let d;p?d=new No(n,Pu):d=new fr(n,Pu);let h=this.runWebGLProgram(d,[{dataId:e,shape:n,dtype:a}],a),g=this.read(h.dataId);return this.disposeIntermediateTensorInfo(h),g}if(o!=null)return this.convertAndCacheOnCPU(e);if(P().getBool("DEBUG")&&!P().getBool("WEBGL_DOWNLOAD_FLOAT_ENABLED")&&P().getNumber("WEBGL_VERSION")===2)throw new Error("tensor.data() with WEBGL_DOWNLOAD_FLOAT_ENABLED=false and WEBGL_VERSION=2 not yet supported.");let u=null,c;if(a!=="complex64"&&P().get("WEBGL_BUFFER_SUPPORTED")){c=this.decode(e);let d=this.texData.get(c.dataId);u=this.gpgpu.createBufferFromTexture(d.texture.texture,..._l(n))}this.pendingRead.set(e,[]),a!=="complex64"&&await this.gpgpu.createAndWaitForFence();let l;if(a==="complex64"){let d=await Promise.all([this.read(i.real.dataId),this.read(i.imag.dataId)]),h=d[0],g=d[1];l=I.mergeRealAndImagArrays(h,g)}else if(u==null)l=this.getValuesFromT
if (isnan(a)) return a;
if (isnan(b)) return b;
`;var _o=class{constructor(e,t,o){this.variableNames=["A","B"],this.outputShape=I.assertAndGetBroadcastShape(t,o),this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
float binaryOperation(float a, float b) {
${e}
}
void main() {
float a = getAAtOutCoords();
float b = getBAtOutCoords();
setOutput(binaryOperation(a, b));
}
`}};var js=`
result.r = isNaN.r ? NAN : result.r;
result.g = isNaN.g ? NAN : result.g;
result.b = isNaN.b ? NAN : result.b;
result.a = isNaN.a ? NAN : result.a;
`;var Ko=class{constructor(e,t,o,n=!1){this.variableNames=["A","B"],this.supportsBroadcasting=!0,this.packedInputs=!0,this.packedOutput=!0,this.outputShape=I.assertAndGetBroadcastShape(t,o);let s=this.outputShape.length;this.enableShapeUniforms=lt(s);let a="";if(n)if(s===0||x.sizeFromShape(this.outputShape)===1)a=`
result.y = 0.;
result.z = 0.;
result.w = 0.;
`;else if(a=`
${_e(s)} coords = getOutputCoords();
`,s===1)this.enableShapeUniforms?a+=`
result.y = (coords + 1) >= outShape ? 0. : result.y;
result.z = 0.;
result.w = 0.;
`:a+=`
result.y = (coords + 1) >= ${this.outputShape[0]} ? 0. : result.y;
result.z = 0.;
result.w = 0.;
`;else{let p=$t("coords",s);this.enableShapeUniforms?a+=`
bool nextRowOutOfBounds =
(${p[s-2]} + 1) >= outShape[${s} - 2];
bool nextColOutOfBounds =
(${p[s-1]} + 1) >= outShape[${s} - 1];
result.y = nextColOutOfBounds ? 0. : result.y;
result.z = nextRowOutOfBounds ? 0. : result.z;
result.w = nextColOutOfBounds || nextRowOutOfBounds ? 0. : result.w;
`:a+=`
bool nextRowOutOfBounds =
(${p[s-2]} + 1) >= ${this.outputShape[s-2]};
bool nextColOutOfBounds =
(${p[s-1]} + 1) >= ${this.outputShape[s-1]};
result.y = nextColOutOfBounds ? 0. : result.y;
result.z = nextRowOutOfBounds ? 0. : result.z;
result.w = nextColOutOfBounds || nextRowOutOfBounds ? 0. : result.w;
`}this.userCode=`
vec4 binaryOperation(vec4 a, vec4 b) {
${e}
}
void main() {
vec4 a = getAAtOutCoords();
vec4 b = getBAtOutCoords();
vec4 result = binaryOperation(a, b);
${a}
setOutput(result);
}
`}};function Rt(r){let{inputs:e,backend:t}=r,{x:o}=e;return t.incRef(o.dataId),{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}var cR={kernelName:uo,backendName:"webgl",kernelFunc:Rt};function Ar(r){let{inputs:e,backend:t}=r,{real:o,imag:n}=e,s=t.makeTensorInfo(o.shape,"complex64"),a=t.texData.get(s.dataId),i=Rt({inputs:{x:o},backend:t}),p=Rt({inputs:{x:n},backend:t});return a.complexTensorInfos={real:i,imag:p},s}var lR={kernelName:aa,backendName:"webgl",kernelFunc:Ar};var Tw="return (a < 0.) ? b * a : a;",Nw=`
vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
`;function xY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{alpha:s}=o,a=t.makeTensorInfo([],"float32",x.createScalarValue(s,"float32")),i=P().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new Ko(Nw,n.shape,a.shape):new _o(Tw,n.shape,a.shape),p=t.runWebGLProgram(i,[n,a],"float32");return t.disposeIntermediateTensorInfo(a),p}var mR={kernelName:Nn,backendName:"webgl",kernelFunc:xY};var _w="return (a < 0.) ? b * a : a;",Ew=`
vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
`;function yY(r){let{inputs:e,backend:t}=r,{x:o,alpha:n}=e,s=P().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new Ko(Ew,o.shape,n.shape):new _o(_w,o.shape,n.shape);return t.runWebGLProgram(s,[o,n],"float32")}var fR={kernelName:Vn,backendName:"webgl",kernelFunc:yY};var jo="if (isnan(x)) return x;";function he({opSnippet:r,packedOpSnippet:e,cpuKernelImpl:t,dtype:o}){return({inputs:n,backend:s})=>{let{x:a}=n,i=s,p=o||a.dtype;if(i.shouldExecuteOnCPU([a])&&t!=null){let l=i.texData.get(a.dataId),m=t(l.values,p);return i.makeTensorInfo(a.shape,p,m)}let u=P().getBool("WEBGL_PACK_UNARY_OPERATIONS")&&e!=null,c;return u?c=new No(a.shape,e):c=new fr(a.shape,r),i.runWebGLProgram(c,[a],p)}}function ot({opSnippet:r,packedOpSnippet:e,checkOutOfBounds:t=!1,supportsComplex:o=!1,cpuKernelImpl:n,dtype:s}){return({inputs:a,backend:i})=>{let{a:p,b:u}=a,c=i;if(o&&p.dtype==="complex64"){let d=c.texData.get(p.dataId),h=c.texData.get(u.dataId),[g,y]=[[d.complexTensorInfos.real,h.complexTensorInfos.real],[d.complexTensorInfos.imag,h.complexTensorInfos.imag]].map(C=>{let[w,k]=C,_={dataId:w.dataId,dtype:w.dtype,shape:p.shape},E={dataId:k.dataId,dtype:k.dtype,shape:u.shape},R=new _o(r,p.shape,u.shape);return c.runWebGLProgram(R,[_,E],ct(w.dtype,k.dtype))}),b=Ar({inputs:{real:g,imag:y},backend:c});return c.disposeIntermediateTensorInfo(g),c.disposeIntermediateTensorInfo(y),b}let l=s||ct(p.dtype,u.dtype);if((p.dtype==="string"||u.dtype==="string"||c.shouldExecuteOnCPU([p,u]))&&n!=null){let d=c.texData.get(p.dataId).values,h=c.texData.get(u.dataId).values,g=p.dtype==="string"?I.fromUint8ToStringArray(d):d,y=p.dtype==="string"?I.fromUint8ToStringArray(h):h,[b,C]=n(p.shape,u.shape,g,y,l),w=c.makeTensorInfo(C,l),k=c.texData.get(w.dataId);return k.values=b,w}let m=P().getBool("WEBGL_PACK_BINARY_OPERATIONS")&&e!=null,f;return m?f=new Ko(e,p.shape,u.shape,t):f=new _o(r,p.shape,u.shape),c.runWebGLProgram(f,[p,u],l)}}function Ma(r,e=!1){if(r==="linear")return e?nR:Z$;if(r==="relu")return e?aR:eR;if(r==="elu")return e?sR:J$;if(r==="relu6")return e?iR:tR;if(r==="prelu")return e?Ew:_w;if(r==="leakyrelu")return e?Nw:Tw;if(r==="sigmoid")return e?uR:rR;throw new Error(`Activation ${r} has not been implemented for the WebGL backend.`)}var xc=class{constructor(e,t,o,n=!1,s=!1,a=!1,i=null,p=!1,u=!1){this.variableNames=["matrixA","matrixB"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=o,this.enableShapeUniforms=lt(this.outputShape.length);let c=n?e[1]:e[2],l=Math.ceil(c/2),m=n?"i * 2, rc.y":"rc.y, i * 2",f=s?"rc.z, i * 2":"i * 2, rc.z",d=n?["a.xxyy","a.zzww"]:["a.xxzz","a.yyww"],h=s?["b.xzxz","b.ywyw"]:["b.xyxy","b.zwzw"],g="",y="";i&&(p?g=`vec4 activation(vec4 a) {
vec4 b = getPreluActivationWeightsAtOutCoords();
${i}
}`:u?g=`vec4 activation(vec4 a) {
vec4 b = getLeakyreluAlphaAtOutCoords();
${i}
}`:g=`vec4 activation(vec4 x) {
${i}
}`,y="result = activation(result);");let b=a?"result += getBiasAtOutCoords();":"";a&&this.variableNames.push("bias"),p&&this.variableNames.push("preluActivationWeights"),u&&this.variableNames.push("leakyreluAlpha");let C="rc.x",w="rc.x";e[0]<t[0]?C=`int(min(float(rc.x), ${e[0]-1}.))`:t[0]<e[0]&&(w=`int(min(float(rc.x), ${t[0]-1}.))`),this.userCode=`
${g}
// Don't use uniform for sharedDimensionPacked for performance.
const float sharedDimension = ${l}.0;
vec4 dot2x2ARowBCol(ivec3 rc) {
vec4 result = vec4(0);
for (int i = 0; i < ${l}; i++) {
int batchA = ${C};
int batchB = ${w};
vec4 a = getMatrixA(batchA, ${m});
vec4 b = getMatrixB(batchB, ${f});
// These swizzled products need to be separately added.
// See: https://github.com/tensorflow/tfjs/issues/1735
result += (${d[0]} * ${h[0]});
result += (${d[1]} * ${h[1]});
}
return result;
}
void main() {
ivec3 rc = getOutputCoords();
vec4 result = dot2x2ARowBCol(rc);
${b}
${y}
setOutput(result);
}
`}};var $w={REAL:"return areal * breal - aimag * bimag;",IMAG:"return areal * bimag + aimag * breal;"},Dl=class{constructor(e,t,o){this.variableNames=["AReal","AImag","BReal","BImag"],this.outputShape=I.assertAndGetBroadcastShape(t,o),this.userCode=`
float binaryOpComplex(
float areal, float aimag, float breal, float bimag) {
${e}
}
void main() {
float areal = getARealAtOutCoords();
float aimag = getAImagAtOutCoords();
float breal = getBRealAtOutCoords();
float bimag = getBImagAtOutCoords();
setOutput(binaryOpComplex(areal, aimag, breal, bimag));
}
`}};var dR="return a * b;";function Pl(r){let{inputs:e,backend:t}=r,{a:o,b:n}=e,s=I.upcastType(o.dtype,n.dtype);if(o.dtype==="complex64"){let i=t.texData.get(o.dataId),p=t.texData.get(n.dataId),u=new Dl($w.REAL,o.shape,n.shape),c=new Dl($w.IMAG,o.shape,n.shape),l=[{dataId:i.complexTensorInfos.real.dataId,dtype:i.complexTensorInfos.real.dtype,shape:o.shape},{dataId:i.complexTensorInfos.imag.dataId,dtype:i.complexTensorInfos.imag.dtype,shape:o.shape},{dataId:p.complexTensorInfos.real.dataId,dtype:p.complexTensorInfos.real.dtype,shape:n.shape},{dataId:p.complexTensorInfos.imag.dataId,dtype:p.complexTensorInfos.imag.dtype,shape:n.shape}],m=t.runWebGLProgram(u,l,"float32"),f=t.runWebGLProgram(c,l,"float32"),d=Ar({inputs:{real:m,imag:f},backend:t});return t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(f),d}if(t.shouldExecuteOnCPU([o,n])){let i=t.texData.get(o.dataId),p=t.texData.get(n.dataId),[u,c]=k$(o.shape,n.shape,i.values,p.values,s),l=t.makeTensorInfo(c,s),m=t.texData.get(l.dataId);return m.values=u,l}let a;return P().getBool("WEBGL_PACK_BINARY_OPERATIONS")?a=new Ko(dR,o.shape,n.shape):a=new _o(dR,o.shape,n.shape),t.runWebGLProgram(a,[o,n],s)}var hR={kernelName:ho,backendName:"webgl",kernelFunc:Pl};function gR(r,e,t){let o=[Pa(r.shape),...Oa(r.shape)],n={dtype:r.dtype,shape:o,dataId:r.dataId},s=[Pa(e),...Oa(e)],a=new hc(s,o),i=!0,p=[o],u=t.runWebGLProgram(a,[n],r.dtype,p,i);return{dataId:u.dataId,shape:e,dtype:u.dtype}}function J(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{shape:s}=o,a=t,i=x.sizeFromShape(n.shape),p=x.inferFromImplicitShape(s,i),u=x.sizeFromShape(p);x.assert(i===u,()=>`The new shape (${p}) has ${u} elements and the old shape (${n.shape}) has ${i} elements. The new shape and old shape must have the same number of elements.`);let c=a.texData.get(n.dataId);return c.isPacked&&!Ti(n.shape,p)&&!(c.texture!==null&&Ti(c.shape,p))?gR(n,p,a):(a.incRef(n.dataId),{dataId:n.dataId,shape:p,dtype:n.dtype})}var xR={kernelName:Ss,backendName:"webgl",kernelFunc:J};var Ol=class{constructor(e,t){this.variableNames=["x"];let{windowSize:o,batchSize:n,inSize:s,outSize:a}=e;this.outputShape=[n,a];let i=Math.floor(o/4)*4,p=o%4,u="sumValue += dot(values, ones);";if(t!=null){let l=1/t;u=`sumValue += dot(values * ${x.isInt(l)?l.toPrecision(2):l}, ones);`}let c="";s%o>0&&(c=`
if (inIdx < 0 || inIdx >= ${s}) {
return 0.0;
}
`),this.userCode=`
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float getValue(int batch, int inIdx) {
${c}
return getX(batch, inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${o};
float sumValue = 0.0;
for (int i = 0; i < ${i}; i += 4) {
int inIdx = inOffset + i;
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
${u}
}
int inIdx = inOffset + ${i};
if (${p===1}) {
vec4 values = vec4(getValue(batch, inIdx), 0.0, 0.0, 0.0);
${u}
} else if (${p===2}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1), 0.0, 0.0);
${u}
} else if (${p===3}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2), 0.0);
${u}
}
setOutput(sumValue);
}
`}};var ch=class{constructor(e,t){this.variableNames=["x"];let{windowSize:o,batchSize:n,inSize:s,outSize:a}=e;this.outputShape=[n,a];let i="0.0",p="";t==="prod"?i="1.0":t==="min"?(i="1.0 / 1e-20",p="min"):t==="max"&&(i="-1.0 / 1e-20",p="max");let u=`${t}(${t}(${t}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;t==="sum"?u="sumValue":t==="prod"?u="prodValue":t==="all"?u="allValue":t==="any"&&(u="anyValue");let c=Math.floor(o/4)*4,l=o%4,m=`
if (${t==="sum"}) {
sumValue += dot(values, ones);
} else if (${t==="prod"}) {
vec2 tmp = vec2(values[0], values[1]) * vec2(values[2], values[3]);
prodValue *= tmp[0] * tmp[1];
} else {
minMaxValue = ${p}(values, minMaxValue);
if (${t==="min"} || ${t==="max"}) {
minMaxValue = ${p}(values, minMaxValue);
bvec4 isNaN = isnan(values);
if (isNaN.r || isNaN.g || isNaN.b || isNaN.a) {
minMaxValue = vec4(NAN);
}
}
}
`,f="vec4";t==="all"?(i="1.0",m=`
bool reducedAllValue = all(values);
float floatedReducedAllValue = float(reducedAllValue);
allValue = float(allValue >= 1.0 && floatedReducedAllValue >= 1.0);
`,f="bvec4"):t==="any"&&(i="0.0",m=`
bool reducedAnyValue = any(values);
float floatedReducedAnyValue = float(reducedAnyValue);
anyValue = float(anyValue >= 1.0 || floatedReducedAnyValue >= 1.0);
`,f="bvec4");let d="";s%o>0&&(d=`
if (inIdx < 0 || inIdx >= ${s}) {
return initializationValue;
}
`),this.userCode=`
const float initializationValue = ${i};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float getValue(int batch, int inIdx) {
${d}
return getX(batch, inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${o};
vec4 minMaxValue = vec4(${i});
float prodValue = 1.0;
float sumValue = 0.0;
float allValue = 1.0;
float anyValue = 0.0;
for (int i = 0; i < ${c}; i += 4) {
int inIdx = inOffset + i;
${f} values = ${f}(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
${m}
}
int inIdx = inOffset + ${c};
if (${l===1}) {
${f} values = ${f}(
getValue(batch, inIdx),
initializationValue,
initializationValue,
initializationValue
);
${m}
} else if (${l===2}) {
${f} values = ${f}(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
initializationValue,
initializationValue
);
${m}
} else if (${l===3}) {
${f} values = ${f}(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
initializationValue
);
${m}
}
setOutput(${u});
}
`}};function CY(r){let e=[];for(;e.length===0||e[e.length-1].outSize!==1;){let t=e.length?e[e.length-1].outSize:r[1],o=I.computeOptimalWindowSize(t);e.push({inSize:t,windowSize:o,outSize:Math.ceil(t/o)})}return e}function qr(r,e,t,o){let n=CY(r.shape),s=r;for(let a=0;a<n.length;a++){let{inSize:i,windowSize:p,outSize:u}=n[a],c,l;t==="mean"?c=a===0?new Ol({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u},i):new Ol({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u}):c=new ch({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u},t),l=s,s=o.runWebGLProgram(c,[s],e),l.dataId!==r.dataId&&o.disposeIntermediateTensorInfo(l)}return s}var lh=class{constructor(e,t){this.variableNames=["A"];let o=new Array(e.length);for(let a=0;a<o.length;a++)o[a]=e[t[a]];this.outputShape=o,this.rank=o.length;let n=_e(this.rank),s=IY(t);this.userCode=`
void main() {
${n} resRC = getOutputCoords();
setOutput(getA(${s}));
}
`}};function IY(r){let e=r.length;if(e>6)throw Error(`Transpose for rank ${e} is not yet supported`);let t=["resRC.x","resRC.y","resRC.z","resRC.w","resRC.u","resRC.v"],o=new Array(e);for(let n=0;n<r.length;n++)o[r[n]]=t[n];return o.join()}var mh=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0;let o=new Array(e.length);for(let c=0;c<o.length;c++)o[c]=e[t[c]];if(this.outputShape=o,this.rank=o.length,this.rank>6)throw Error(`Packed transpose for rank ${this.rank} is not yet supported.`);let n=_e(this.rank),s=vw("rc",this.rank),a=new Array(this.rank);for(let c=0;c<t.length;c++)a[t[c]]=s[c];let i=`vec2(${a.slice(-2).join()})`,p=`++${s[this.rank-1]} < ${o[this.rank-1]}`,u=`getChannel(getA(${a.join()}), ${i})`;this.userCode=`
void main() {
${n} rc = getOutputCoords();
vec4 result = vec4(0.);
result[0] = ${u};
if(${p}) {
result[1] = ${u};
}
--${s[this.rank-1]};
if(++${s[this.rank-2]} < ${o[this.rank-2]}) {
result[2] = ${u};
if(${p}) {
result[3] = ${u};
}
}
setOutput(result);
}
`}};function _i(r,e,t){let o=P().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new mh(r.shape,e):new lh(r.shape,e);return t.runWebGLProgram(o,[r],r.dtype)}function yR(r,e,t,o){let n=e,s=r.shape.length,a=x.parseAxisParam(n,r.shape),i=a,p=I.getAxesPermutation(i,s),u=p!=null,c=r;u&&(c=_i(r,p,o),i=I.getInnerMostAxes(i.length,s)),I.assertAxesAreInnerMostDims("sum",i,s);let[l,m]=I.computeOutAndReduceShapes(c.shape,i),f=l;t&&(f=I.expandShapeToKeepDim(l,a));let d=x.sizeFromShape(m),g=x.sizeFromShape(r.shape)/d,y=J({inputs:{x:c},attrs:{shape:[g,d]},backend:o}),b=Ca(r.dtype),C=qr(y,b,"sum",o),w=J({inputs:{x:C},attrs:{shape:f},backend:o});return o.disposeIntermediateTensorInfo(y),o.disposeIntermediateTensorInfo(C),u&&o.disposeIntermediateTensorInfo(c),w}function Ou(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return yR(n,s,a,t)}var bR={kernelName:jn,backendName:"webgl",kernelFunc:Ou};function xt(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{perm:s}=o,a=t,i=n.shape.length,p=new Array(i);for(let c=0;c<p.length;c++)p[c]=n.shape[s[c]];let u;if(a.shouldExecuteOnCPU([n])){let l=a.texData.get(n.dataId).values,m=Du(l,n.shape,n.dtype,s,p);u=a.makeTensorInfo(p,n.dtype);let f=a.texData.get(u.dataId);f.values=m}else u=_i(n,s,a);return u}var CR={kernelName:Mr,backendName:"webgl",kernelFunc:xt};var Rw=1e3;function Mu({a:r,b:e,transposeA:t,transposeB:o,backend:n,bias:s=null,preluActivationWeights:a=null,leakyreluAlpha:i=0,activation:p=null}){let u=r.shape.length,c=e.shape.length,l=t?r.shape[u-2]:r.shape[u-1],m=o?e.shape[c-1]:e.shape[c-2],f=t?r.shape[u-1]:r.shape[u-2],d=o?e.shape[c-2]:e.shape[c-1],h=r.shape.slice(0,-2),g=e.shape.slice(0,-2),y=x.sizeFromShape(h),b=x.sizeFromShape(g),w=br.assertAndGetBroadcastShape(r.shape.slice(0,-2),e.shape.slice(0,-2)).concat([f,d]);x.assert(l===m,()=>`Error in matMul: inner shapes (${l}) and (${m}) of Tensors with shapes ${r.shape} and ${e.shape} and transposeA=${t} and transposeB=${o} must match.`);let k=t?[y,l,f]:[y,f,l],_=o?[b,d,m]:[b,m,d],E=J({inputs:{x:r},backend:n,attrs:{shape:k}}),R=J({inputs:{x:e},backend:n,attrs:{shape:_}}),A=[E,R],D=Math.max(y,b),O=t?E.shape[1]:E.shape[2],M=s!=null,L=a!=null,W=p==="leakyrelu",V=p!=null?Ma(p,!0):null,G=M||L||W||V!=null,q;if((f===1||d===1)&&O>Rw&&G===!1){let j=E,Y=R;t&&(j=xt({inputs:{x:E},backend:n,attrs:{perm:[0,2,1]}}),A.push(j)),o&&(Y=xt({inputs:{x:R},backend:n,attrs:{perm:[0,2,1]}}),A.push(Y));let Z=d!==1,ee=d===1,X=j;Z&&(X=J({inputs:{x:j},backend:n,attrs:{shape:[D,O,1]}}),A.push(X));let Q=d===1?2:1,se=Y;ee&&(se=J({inputs:{x:Y},backend:n,attrs:{shape:[D,1,O]}}),A.push(se));let ie=Pl({inputs:{a:X,b:se},backend:n});q=Ou({inputs:{x:ie},backend:n,attrs:{axis:Q,keepDims:!0}}),A.push(ie)}else{let j=ct(r.dtype,e.dtype),Y=new xc(k,_,[D,f,d],t,o,M,V,L,W),Z=[E,R];if(s!=null&&Z.push(s),L&&Z.push(a),W){let ee=n.makeTensorInfo([],"float32",x.createScalarValue(i,"float32"));Z.push(ee),A.push(ee)}q=n.runWebGLProgram(Y,Z,j)}let H=J({inputs:{x:q},backend:n,attrs:{shape:w}});A.push(q);for(let j of A)n.disposeIntermediateTensorInfo(j);return H}function wY(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=e,{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o;return Mu({a:n,b:s,transposeA:p,transposeB:u,backend:t,bias:a,preluActivationWeights:i,leakyreluAlpha:l,activation:c})}var IR={kernelName:Fo,backendName:"webgl",kernelFunc:wY};var wR="return abs(x);";function SY(r){let{inputs:e,backend:t}=r,{x:o}=e;if(t.shouldExecuteOnCPU([o])&&o.dtype!=="complex64"){let s=t.texData.get(o.dataId),a=nh(s.values);return t.makeTensorInfo(o.shape,o.dtype,a)}let n;return P().getBool("WEBGL_PACK_UNARY_OPERATIONS")?n=new No(o.shape,wR):n=new fr(o.shape,wR),t.runWebGLProgram(n,[o],o.dtype)}var SR={kernelName:sn,backendName:"webgl",kernelFunc:SY};var vY=Vt+`
if (abs(x) > 1.) {
return NAN;
}
return acos(x);
`,kY=he({opSnippet:vY}),vR={kernelName:Li,backendName:"webgl",kernelFunc:kY};var TY=Vt+`
if (x < 1.0) return NAN;
return log(x + sqrt(x * x - 1.0));`,NY=he({opSnippet:TY}),kR={kernelName:Bi,backendName:"webgl",kernelFunc:NY};var TR="return a + b;",_Y=ot({opSnippet:TR,packedOpSnippet:TR,supportsComplex:!0,cpuKernelImpl:s$}),NR={kernelName:_r,backendName:"webgl",kernelFunc:_Y};var fh=class{constructor(e,t){this.outputShape=[],this.outputShape=e,this.variableNames=t.map((s,a)=>`T${a}`);let o=[];this.variableNames.forEach(s=>{o.push(`float v${s} = get${s}AtOutCoords();`)});let n=this.variableNames.map(s=>`v${s}`).join(" + ");this.userCode=`
void main() {
${o.join(`
`)}
float result = ${n};
setOutput(result);
}
`}};var dh=class{constructor(e,t){this.outputShape=[],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=e,this.variableNames=t.map((s,a)=>`T${a}`);let o=[];this.variableNames.forEach(s=>{o.push(`vec4 v${s} = get${s}AtOutCoords();`)});let n=this.variableNames.map(s=>`v${s}`).join(" + ");this.userCode=`
void main() {
${o.join(`
`)}
vec4 result = ${n};
setOutput(result);
}
`}};function hh(r){let{inputs:e,backend:t}=r,o=e;if(o.length===1)return Rt({inputs:{x:o[0]},backend:t});if(o.length>P().get("WEBGL_MAX_TEXTURES_IN_SHADER")){let p=Math.floor(o.length/2),u=hh({inputs:o.slice(0,p),backend:t}),c=hh({inputs:o.slice(p),backend:t});return hh({inputs:[u,c],backend:t})}let n=o.map(p=>p.dtype).reduce((p,u)=>ct(p,u)),s=o.map(p=>p.shape),i=P().getBool("WEBGL_PACK")?new dh(o[0].shape,s):new fh(o[0].shape,s);return t.runWebGLProgram(i,o,n)}var _R={kernelName:an,backendName:"webgl",kernelFunc:hh};function EY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=x.parseAxisParam(s,n.shape),u=p,c=I.getAxesPermutation(u,i),l=n;c!=null&&(l=xt({inputs:{x:n},backend:t,attrs:{perm:c}}),u=I.getInnerMostAxes(u.length,i)),I.assertAxesAreInnerMostDims("all",u,i);let[m,f]=I.computeOutAndReduceShapes(l.shape,u),d=x.sizeFromShape(f),h=J({inputs:{x:l},backend:t,attrs:{shape:[-1,d]}}),g=qr(h,h.dtype,"all",t),y;if(a){let b=I.expandShapeToKeepDim(m,p);y=J({inputs:{x:g},backend:t,attrs:{shape:b}})}else y=J({inputs:{x:g},backend:t,attrs:{shape:m}});return t.disposeIntermediateTensorInfo(h),t.disposeIntermediateTensorInfo(g),c!=null&&t.disposeIntermediateTensorInfo(l),y}var ER={kernelName:oa,backendName:"webgl",kernelFunc:EY};function $Y(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=x.parseAxisParam(s,n.shape),u=p,c=I.getAxesPermutation(u,i),l=n;c!=null&&(l=xt({inputs:{x:n},backend:t,attrs:{perm:c}}),u=I.getInnerMostAxes(u.length,i)),I.assertAxesAreInnerMostDims("any",u,i);let[m,f]=I.computeOutAndReduceShapes(l.shape,u),d=x.sizeFromShape(f),h=J({inputs:{x:l},backend:t,attrs:{shape:[-1,d]}}),g=qr(h,h.dtype,"any",t),y;if(a){let b=I.expandShapeToKeepDim(m,p);y=J({inputs:{x:g},backend:t,attrs:{shape:b}})}else y=J({inputs:{x:g},backend:t,attrs:{shape:m}});return t.disposeIntermediateTensorInfo(h),t.disposeIntermediateTensorInfo(g),c!=null&&t.disposeIntermediateTensorInfo(l),y}var $R={kernelName:na,backendName:"webgl",kernelFunc:$Y};var gh=class{constructor(e,t,o){this.variableNames=["A"];let{windowSize:n,batchSize:s,outSize:a}=e;o||this.variableNames.push("bestIndicesA"),this.outputShape=[s,a];let i=t==="max"?">":"<",p=o?"inOffset + i;":"round(getBestIndicesA(batch, inOffset + i));";this.userCode=`
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${n};
int bestIndex = inOffset;
float bestValue = getA(batch, bestIndex);
for (int i = 0; i < ${n}; i++) {
int inIdx = ${p};
float candidate = getA(batch, inIdx);
if (candidate ${i} bestValue) {
bestValue = candidate;
bestIndex = inIdx;
}
}
setOutput(float(bestIndex));
}
`}};var xh=class{constructor(e,t,o,n){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,x.assert(e.length>2,()=>`Packed arg${o.charAt(0).toUpperCase()+o.slice(1)} supports only inputs with rank above 2.`);let s=e[e.length-1],a=Math.ceil(s/t);this.outputShape=e.slice(0,-1),a>1&&this.outputShape.push(a),n||this.variableNames.push("bestIndicesA");let i=this.outputShape,p=i.length,u=_e(p),c=$t("coords",p),l,m;if(a===1){m=p+1;let R=_e(m);l=`
${R} sourceLocR = ${R}(${c.join()}, 0);
++${c[p-1]};
${R} sourceLocG = ${R}(${c.join()}, 0);
++${c[p-2]};
${R} sourceLocA = ${R}(${c.join()}, 0);
--${c[p-1]};
${R} sourceLocB = ${R}(${c.join()}, 0);
--${c[p-2]};`}else m=p,l=`
${u} sourceLocR = coords;
++${c[p-1]};
${u} sourceLocG = coords;
++${c[p-2]};
${u} sourceLocA = coords;
--${c[p-1]};
${u} sourceLocB = coords;
--${c[p-2]};`;let f=["x","y","z","w","u","v"].slice(0,m),d="."+f[m-1],h=f.map(R=>"int "+R),g=$t("sourceLocR",m-1).concat("inIdx.r"),y=$t("sourceLocG",m-1).concat("inIdx.g"),b=$t("sourceLocB",m-1).concat("inIdx.b"),C=$t("sourceLocA",m-1).concat("inIdx.a"),w=o==="max"?"greaterThan":"lessThan",k=n?"":`
inIdx = round(vec4(getBestIndicesAChannel(${g.join()}),
getBestIndicesAChannel(${y.join()}),
getBestIndicesAChannel(${b.join()}),
getBestIndicesAChannel(${C.join()})));`,_=`vec4(
getAChannel(${g.join()}),
hasNextCol ? getAChannel(${y.join()}) : 0.,
hasNextRow ? getAChannel(${b.join()}) : 0.,
hasNextRow && hasNextCol ? getAChannel(${C.join()}) : 0.)`,E=n?"":`
float getBestIndicesAChannel(${h.join()}) {
return getChannel(getBestIndicesA(${f.join()}),
vec2(${f.slice(-2).join()}));
}`;this.userCode=`
float getAChannel(${h.join()}) {
return getChannel(getA(${f.join()}),
vec2(${f.slice(-2).join()}));
}
${E}
void main() {
${u} coords = getOutputCoords();
bool hasNextCol = ${c[p-1]} < ${i[p-1]-1};
bool hasNextRow = ${c[p-2]} < ${i[p-2]-1};
${l}
ivec4 srcIdx = ivec4(sourceLocR${d}, sourceLocG${d},
sourceLocB${d}, sourceLocA${d}) * ${t};
ivec4 inIdx = srcIdx;
vec4 bestIndex = vec4(inIdx);
vec4 bestValue = ${_};
for (int i = 0; i < ${t}; i++) {
inIdx = srcIdx;
${k}
vec4 candidate = ${_};
bvec4 nan = isnan(candidate);
bvec4 replace = bvec4(
vec4(${w}(candidate, bestValue)) * (vec4(1.0) - vec4(nan)));
bestValue = vec4(replace.x ? candidate.x : bestValue.x,
replace.y ? candidate.y : bestValue.y,
replace.z ? candidate.z : bestValue.z,
replace.w ? candidate.w : bestValue.w);
bestIndex = mix(bestIndex, vec4(inIdx), vec4(replace));
srcIdx++;
}
setOutput(bestIndex);
}
`}};function RR(r,e,t,o=null){let n=e.shape[0],s=e.shape[1];o!=null&&(n=o.shape[0],s=o.shape[1]);let a=I.computeOptimalWindowSize(s),i={windowSize:a,inSize:s,batchSize:n,outSize:Math.ceil(s/a)},p=new gh(i,t,o==null),u=[e];o!=null&&u.push(o);let c=r.runWebGLProgram(p,u,"int32");if(c.shape[1]===1)return c;let l=RR(r,e,t,c);return r.disposeIntermediateTensorInfo(c),l}function AR(r,e,t,o=null){let n=o!=null?o.shape:e.shape,s=n[n.length-1],a=I.computeOptimalWindowSize(s),i=new xh(n,a,t,o==null),p=o==null?[e]:[e,o],u=r.runWebGLProgram(i,p,"int32");if(u.shape.length===e.shape.length){let c=AR(r,e,t,u);return r.disposeIntermediateTensorInfo(u),c}return u}function yh(r,e,t,o){let n=[t];if(I.assertAxesAreInnerMostDims("arg"+o.charAt(0).toUpperCase()+o.slice(1),n,e.shape.length),!P().getBool("WEBGL_PACK_REDUCE")||e.shape.length<=2){let s=[],a=r.texData.get(e.dataId),i=a!==null&&a.isPacked,p=e;i&&(p=r.unpackTensor(e),s.push(p));let[u,c]=I.computeOutAndReduceShapes(p.shape,n),l=x.sizeFromShape(c),m=J({inputs:{x:p},backend:r,attrs:{shape:[-1,l]}});s.push(m);let f=RR(r,m,o);s.push(f);let d=J({inputs:{x:f},backend:r,attrs:{shape:u}});return s.forEach(h=>r.disposeIntermediateTensorInfo(h)),d}return AR(r,e,o)}function RY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=x.parseAxisParam(s,n.shape),i=I.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=xt({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=I.getInnerMostAxes(a.length,p.shape.length)),I.assertAxesAreInnerMostDims("argMax",[a[0]],p.shape.length);let c=yh(t,p,a[0],"max");return u.forEach(l=>t.disposeIntermediateTensorInfo(l)),c}var FR={kernelName:un,backendName:"webgl",kernelFunc:RY};function AY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=x.parseAxisParam(s,n.shape),i=I.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=xt({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=I.getInnerMostAxes(a.length,p.shape.length)),I.assertAxesAreInnerMostDims("argMin",[a[0]],p.shape.length);let c=yh(t,p,a[0],"min");return u.forEach(l=>t.disposeIntermediateTensorInfo(l)),c}var DR={kernelName:ja,backendName:"webgl",kernelFunc:AY};var FY=Vt+`
if (abs(x) > 1.) {
return NAN;
}
return asin(x);
`,DY=he({opSnippet:FY}),PR={kernelName:Vi,backendName:"webgl",kernelFunc:DY};var PY=Vt+"return log(x + sqrt(x * x + 1.0));",OY=he({opSnippet:PY}),OR={kernelName:zi,backendName:"webgl",kernelFunc:OY};var MY=Vt+`
return atan(x);
`,LY=he({opSnippet:MY}),MR={kernelName:Wi,backendName:"webgl",kernelFunc:LY};var BY=gc+`
return atan(a, b);
`,VY=`
vec4 result = atan(a, b);
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
`+js+`
return result;
`,zY=ot({opSnippet:BY,packedOpSnippet:VY}),LR={kernelName:sa,backendName:"webgl",kernelFunc:zY};var WY=Vt+`
if ((x < -1.0) || (x > 1.0)) return NAN;
return (log(1.0 + x) - log(1.0 - x)) / 2.0;`,UY=he({opSnippet:WY}),BR={kernelName:Ui,backendName:"webgl",kernelFunc:UY};var us=class{constructor(e,t,o,n=!1,s=!1){if(this.variableNames=["x"],t==="avg"&&o)throw new Error("Cannot compute positions for average pool.");let a=e.filterWidth,i=e.strideHeight,p=e.strideWidth,u=e.dilationHeight,c=e.dilationWidth,l=e.effectiveFilterHeight,m=e.effectiveFilterWidth,f=e.padInfo.top,d=e.padInfo.left;this.outputShape=e.outShape;let h=t==="avg",g=`((batch * ${e.inHeight} + xR) * ${e.inWidth} + xC) * ${e.inChannels} + d`,y=`(xR * ${e.inWidth} + xC) * ${e.inChannels} + d`,b="0.0";if(h||(b="-1.0 / 1e-20"),o){let R=">=";this.userCode=`
const ivec2 strides = ivec2(${i}, ${p});
const ivec2 pads = ivec2(${f}, ${d});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d = coords[3];
ivec2 xRCCorner = coords.yz * strides - pads;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// max/min x(?, ?, d) to get y(yR, yC, d).
// ? = to be determined
float minMaxValue = 0.0;
float minMaxValueFound = 0.0;
int minMaxPosition = 0;
float avgValue = 0.0;
for (int wR = 0; wR < ${l};
wR += ${u}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${m};
wC += ${c}) {
int xC = xCCorner + wC;
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float value = getX(batch, xR, xC, d);
// If a min / max value has already been found, use it. If not,
// use the current value.
float currMinMaxValue = mix(
value, minMaxValue, minMaxValueFound);
if (value ${R} currMinMaxValue) {
minMaxValue = value;
minMaxValueFound = 1.0;
minMaxPosition = ${n?s?g:y:`wR * ${m} + wC`};
}
}
}
setOutput(float(minMaxPosition));
}
`;return}let C="max",w=`${t}(${t}(${t}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;t==="avg"&&(w="avgValue / count");let k=Math.floor(a/4)*4,_=a%4,E=`
if (${h}) {
avgValue += dot(values, ones);
} else {
minMaxValue = ${C}(values, minMaxValue);
}
`;this.userCode=`
const ivec2 strides = ivec2(${i}, ${p});
const ivec2 pads = ivec2(${f}, ${d});
const float initializationValue = ${b};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float count = 0.0;
float getValue(int batch, int xR, int xC, int d) {
if (xC < 0 || xC >= ${e.inWidth}) {
return initializationValue;
}
count += 1.0;
return getX(batch, xR, xC, d);
}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d = coords[3];
ivec2 xRCCorner = coords.yz * strides - pads;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// max/min x(?, ?, d) to get y(yR, yC, d).
// ? = to be determined
vec4 minMaxValue = vec4(${b});
float avgValue = 0.0;
count = 0.0;
for (int wR = 0; wR < ${l};
wR += ${u}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${k}; wC += 4) {
int xC = xCCorner + wC * ${c};
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
getValue(batch, xR, xC + 2 * ${c}, d),
getValue(batch, xR, xC + 3 * ${c}, d)
);
${E}
}
int xC = xCCorner + ${k};
if (${_===1}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
initializationValue,
initializationValue,
initializationValue
);
${E}
} else if (${_===2}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
initializationValue,
initializationValue
);
${E}
} else if (${_===3}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
getValue(batch, xR, xC + 2 * ${c}, d),
initializationValue
);
${E}
}
}
setOutput(${w});
}
`}},Ei=class{constructor(e,t,o,n=!1,s=!1){if(this.variableNames=["x"],t==="avg"&&o)throw new Error("Cannot compute positions for average pool.");let a=e.filterWidth,i=e.strideDepth,p=e.strideHeight,u=e.strideWidth,c=e.dilationDepth,l=e.dilationHeight,m=e.dilationWidth,f=e.effectiveFilterDepth,d=e.effectiveFilterHeight,h=e.effectiveFilterWidth,g=e.padInfo.front,y=e.padInfo.top,b=e.padInfo.left;this.outputShape=e.outShape;let C=t==="avg",w="0.0";if(C||(w="-1.0 / 1e-20"),o){let D=">=";this.userCode=`
const ivec3 strides =
ivec3(${i}, ${p}, ${u});
const ivec3 pads = ivec3(${g}, ${y}, ${b});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 xCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xDCorner = xCorner.x;
int xRCorner = xCorner.y;
int xCCorner = xCorner.z;
// max/min x(?, ?, ?, ch) to get y(yD, yR, yC, ch).
// ? = to be determined
float minMaxValue = 0.0;
float minMaxValueFound = 0.0;
int minMaxPosition = 0;
for (int wD = 0; wD < ${f};
wD += ${c}) {
int xD = xDCorner + wD;
if (xD < 0 || xD >= ${e.inDepth}) {
continue;
}
for (int wR = 0; wR < ${d};
wR += ${l}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${h};
wC += ${m}) {
int xC = xCCorner + wC;
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float value = getX(batch, xD, xR, xC, ch);
// If a min / max value has already been found, use it. If not,
// use the current value.
float currMinMaxValue = mix(
value, minMaxValue, minMaxValueFound);
if (value ${D} currMinMaxValue) {
minMaxValue = value;
minMaxValueFound = 1.0;
minMaxPosition = ${n?s?`(((batch * ${e.inDepth} + xD) * ${e.inHeight} + xR) * ${e.inWidth} + xC) * ${e.inChannels} + ch`:`((xD * ${e.inHeight} + xR) * ${e.inWidth} + xC) * ${e.inChannels} + ch`:`wD * ${d} * ${h} +
wR * ${h} + wC`};
}
}
}
}
setOutput(float(minMaxPosition));
}
`;return}let k="max",_=`${t}(${t}(${t}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;t==="avg"&&(_="avgValue / count");let E=Math.floor(a/4)*4,R=a%4,A=`
if (${C}) {
avgValue += dot(values, ones);
} else {
minMaxValue = ${k}(values, minMaxValue);
}
`;this.userCode=`
const ivec3 strides =
ivec3(${i}, ${p}, ${u});
const ivec3 pads = ivec3(${g}, ${y}, ${b});
const float initializationValue = ${w};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float count = 0.0;
float getValue(int batch, int xD, int xR, int xC, int ch) {
if (xC < 0 || xC >= ${e.inWidth}) {
return initializationValue;
}
count += 1.0;
return getX(batch, xD, xR, xC, ch);
}
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 xCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xDCorner = xCorner.x;
int xRCorner = xCorner.y;
int xCCorner = xCorner.z;
// max/min x(?, ?, ?, d) to get y(yD, yR, yC, ch).
// ? = to be determined
vec4 minMaxValue = vec4(${w});
float avgValue = 0.0;
count = 0.0;
for (int wD = 0; wD < ${f};
wD += ${c}) {
int xD = xDCorner + wD;
if (xD < 0 || xD >= ${e.inDepth}) {
continue;
}
for (int wR = 0; wR < ${d};
wR += ${l}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${E}; wC += 4) {
int xC = xCCorner + wC * ${m};
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
getValue(batch, xD, xR, xC + 2 * ${m}, ch),
getValue(batch, xD, xR, xC + 3 * ${m}, ch)
);
${A}
}
int xC = xCCorner + ${E};
if (${R===1}) {
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
initializationValue,
initializationValue,
initializationValue
);
${A}
} else if (${R===2}) {
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
initializationValue,
initializationValue
);
${A}
} else if (${R===3}) {
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
getValue(batch, xD, xR, xC + 2 * ${m}, ch),
initializationValue
);
${A}
}
}
setOutput(${_});
}
}
`}};function GY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e;as(n,"avgPool");let{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1;x.assert(I.eitherStridesOrDilationsAreOne(a,u),()=>`Error in avgPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${u}'`);let c=I.computePool2DInfo(n.shape,s,a,u,i,p);if(c.filterWidth===1&&c.filterHeight===1&&x.arraysEqual(c.inShape,c.outShape))return Rt({inputs:{x:n},backend:t});let l=new us(c,"avg",!1);return t.runWebGLProgram(l,[n],"float32")}var VR={kernelName:pn,backendName:"webgl",kernelFunc:GY};function HY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dimRoundingMode:p,dataFormat:u}=o,c=[1,1,1],l=I.computePool3DInfo(n.shape,s,a,c,i,p,u),m=new Ei(l,"avg",!1);return t.runWebGLProgram(m,[n],"float32")}var zR={kernelName:ip,backendName:"webgl",kernelFunc:HY};var bh=class{constructor(e){this.variableNames=["dy"],this.outputShape=e.inShape;let t=e.filterHeight,o=e.filterWidth,n=e.strideHeight,s=e.strideWidth,a=e.dilationHeight,i=e.dilationWidth,p=e.effectiveFilterHeight,u=e.effectiveFilterWidth,c=p-1-e.padInfo.top,l=u-1-e.padInfo.left,m=1/(t*o);this.userCode=`
const ivec2 pads = ivec2(${c}, ${l});
const float avgMultiplier = float(${m});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 dyRCCorner = coords.yz - pads;
int dyRCorner = dyRCCorner.x;
int dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${p};
wR += ${a}) {
float dyR = float(dyRCorner + wR) / ${n}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${u};
wC+= ${i}) {
float dyC = float(dyCCorner + wC) / ${s}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(b, idyR, idyC, d);
dotProd += dyValue * avgMultiplier;
}
}
setOutput(dotProd);
}
`}},Ch=class{constructor(e){this.variableNames=["dy"],this.outputShape=e.inShape;let t=e.filterDepth,o=e.filterHeight,n=e.filterWidth,s=e.strideDepth,a=e.strideHeight,i=e.strideWidth,p=e.dilationDepth,u=e.dilationHeight,c=e.dilationWidth,l=e.effectiveFilterDepth,m=e.effectiveFilterHeight,f=e.effectiveFilterWidth,d=l-1-e.padInfo.front,h=m-1-e.padInfo.top,g=f-1-e.padInfo.left,y=1/(t*o*n);this.userCode=`
const ivec3 pads = ivec3(${d}, ${h}, ${g});
const float avgMultiplier = float(${y});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyDCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, d) with pos mask(:, :, :, ch) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wD = 0; wD < ${l};
wD += ${p}) {
float dyD = float(dyDCorner + wD) / ${s}.0;
if (dyD < 0.0 || dyD >= ${e.outDepth}.0 || fract(dyD) > 0.0) {
continue;
}
int idyD = int(dyD);
for (int wR = 0; wR < ${m};
wR += ${u}) {
float dyR = float(dyRCorner + wR) / ${a}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 ||
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${f};
wC += ${c}) {
float dyC = float(dyCCorner + wC) / ${i}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(batch, idyD, idyR, idyC, ch);
dotProd += dyValue * avgMultiplier;
}
}
}
setOutput(dotProd);
}
`}};function qY(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s}=e,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=[1,1,1],m=I.computePool3DInfo(a.shape,i,p,l,u,c),f=new Ch(m);return t.runWebGLProgram(f,[n],a.dtype)}var WR={kernelName:Fm,backendName:"webgl",kernelFunc:qY};function KY(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s}=e,a=s;as([n,s],"avgPoolGrad");let{filterSize:i,strides:p,pad:u}=o,c=I.computePool2DInfo(a.shape,i,p,1,u),l=new bh(c);return t.runWebGLProgram(l,[n],a.dtype)}var UR={kernelName:Am,backendName:"webgl",kernelFunc:KY};function jY(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s}=e,{transposeA:a,transposeB:i}=o;return Mu({a:n,b:s,transposeA:a,transposeB:i,backend:t})}var GR={kernelName:cn,backendName:"webgl",kernelFunc:jY};var Ih=class{constructor(e,t,o,n,s,a){this.outputShape=[],this.variableNames=["x","mean","variance"],I.assertAndGetBroadcastShape(e,t),I.assertAndGetBroadcastShape(e,o);let i="0.0";n!=null&&(I.assertAndGetBroadcastShape(e,n),this.variableNames.push("offset"),i="getOffsetAtOutCoords()");let p="1.0";s!=null&&(I.assertAndGetBroadcastShape(e,s),this.variableNames.push("scale"),p="getScaleAtOutCoords()"),this.outputShape=e,this.userCode=`
void main() {
float x = getXAtOutCoords();
float mean = getMeanAtOutCoords();
float variance = getVarianceAtOutCoords();
float offset = ${i};
float scale = ${p};
float inv = scale * inversesqrt(variance + float(${a}));
setOutput(dot(vec3(x, -mean, offset), vec3(inv, inv, 1)));
}
`}};var wh=class{constructor(e,t,o,n,s,a){this.packedInputs=!0,this.packedOutput=!0,this.variableNames=["x","mean","variance"],I.assertAndGetBroadcastShape(e,t),I.assertAndGetBroadcastShape(e,o);let i="vec4(0.0)";n!=null&&(I.assertAndGetBroadcastShape(e,n),this.variableNames.push("offset"),i="getOffsetAtOutCoords()");let p="vec4(1.0)";s!=null&&(I.assertAndGetBroadcastShape(e,s),this.variableNames.push("scale"),p="getScaleAtOutCoords()"),this.outputShape=e,this.userCode=`
void main() {
vec4 offset = ${i};
vec4 scale = ${p};
vec4 x = getXAtOutCoords();
vec4 mean = getMeanAtOutCoords();
vec4 variance = getVarianceAtOutCoords();
vec4 inv = scale * inversesqrt(variance + vec4(${a}));
setOutput((x - mean) * inv + offset);
}
`}};var XY=({inputs:r,backend:e,attrs:t})=>{let{x:o,mean:n,variance:s,offset:a,scale:i}=r;x.assert(n.shape.length===s.shape.length,()=>"Batch normalization gradient requires mean and variance to have equal ranks."),x.assert(a==null||n.shape.length===a.shape.length,()=>"Batch normalization gradient requires mean and offset to have equal ranks."),x.assert(i==null||n.shape.length===i.shape.length,()=>"Batch normalization gradient requires mean and scale to have equal ranks.");let{varianceEpsilon:p}=t;p==null&&(p=.001);let u=[o,n,s],c=null;a!=null&&(c=a.shape,u.push(a));let l=null;i!=null&&(l=i.shape,u.push(i));let m=P().getBool("WEBGL_PACK_NORMALIZATION")?new wh(o.shape,n.shape,s.shape,c,l,p):new Ih(o.shape,n.shape,s.shape,c,l,p);return e.runWebGLProgram(m,u,u[0].dtype)},HR={kernelName:kn,backendName:"webgl",kernelFunc:XY};var Sh=class{constructor(e){this.variableNames=["source"],this.outputShape=e,this.rank=e.length;let t=_e(this.rank);this.customUniforms=[{name:"start",arrayIndex:this.rank,type:"int"}];let o=YY(this.rank),n,s=e.map((a,i)=>`sourceLoc.${Aw[i]} = start[${i}] + coords.${Aw[i]};`);n=`
${t} sourceLoc;
${t} coords = getOutputCoords();
${s.join(`
`)}
`,this.userCode=`
void main() {
${n}
setOutput(getSource(${o}));
}
`}},Aw=["x","y","z","w","u","v"];function YY(r){if(r===1)return"sourceLoc";if(r<=6)return Aw.slice(0,r).map(e=>"sourceLoc."+e).join(",");throw Error(`Slicing for rank ${r} is not yet supported`)}var vh=class{constructor(e){this.variableNames=["source"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=e,this.rank=e.length,this.customUniforms=[{name:"start",arrayIndex:this.rank,type:"int"}];let t=_e(this.rank),o=$t("coords",this.rank),n=$t("sourceLoc",this.rank),s=this.rank===1?"sourceLoc":`vec2(${n.slice(-2).join()})`,a=`getChannel(getSource(${n.join()}), ${s})`,i=`
result.x = ${a};
if (++${o[this.rank-1]} < ${e[this.rank-1]}) {
++${n[this.rank-1]};
result.y = ${a};
--${n[this.rank-1]};
}
`,p=this.rank===1?"":`
--${o[this.rank-1]};
if (++${o[this.rank-2]} < ${e[this.rank-2]}) {
++${n[this.rank-2]};
result.z = ${a};
if (++${o[this.rank-1]} < ${e[this.rank-1]}) {
++${n[this.rank-1]};
result.w = ${a};
}
}
`,u=this.rank<=4?`sourceLoc = coords +
${t}(${e.map((c,l)=>`start[${l}]`).join()});`:e.map((c,l)=>`${n[l]} = ${o[l]} + start[${l}];`).join(`
`);this.userCode=`
void main() {
${t} coords = getOutputCoords();
${t} sourceLoc;
${u}
vec4 result = vec4(0.);
${i}
${p}
setOutput(result);
}
`}};function QY(r,e,t,o){let n=o.texData.get(r.dataId),s=o.makeTensorInfo(t,r.dtype),a=o.texData.get(s.dataId);Object.assign(a,n),a.refCount=1,a.shape=t,a.dtype=r.dtype;let i=et.computeFlatOffset(e,x.computeStrides(r.shape));n.slice&&(i+=n.slice.flatOffset),a.slice={flatOffset:i,origDataId:n.slice&&n.slice.origDataId||r.dataId};let p=o.dataRefCount.get(a.slice.origDataId)||1;return o.dataRefCount.set(a.slice.origDataId,p+1),s}function ps(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,size:a}=o,[i,p]=et.parseSliceParams(n,s,a);if(et.assertParamsValid(n,i,p),x.sizeFromShape(p)===0)return t.makeTensorInfo(p,n.dtype,[]);if(t.shouldExecuteOnCPU([n])||n.dtype==="string"){let l=t.texData.get(n.dataId),m=O$(l.values,i,p,n.shape,n.dtype);return t.makeTensorInfo(p,n.dtype,m)}let{isPacked:u}=t.texData.get(n.dataId),c=et.isSliceContinous(n.shape,i,p);if(u||!c){let l=P().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new vh(p):new Sh(p),m=[i];return t.runWebGLProgram(l,[n],n.dtype,m)}return t.uploadToGPU(n.dataId),QY(n,i,p,t)}var qR={kernelName:qn,backendName:"webgl",kernelFunc:ps};var ZY=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,crops:a}=o;x.assert(n.shape.length<=4,()=>"batchToSpaceND for rank > 4 with a WebGL backend not implemented yet");let i=s.reduce((b,C)=>b*C),p=I.getReshaped(n.shape,s,i),u=I.getPermuted(p.length,s.length),c=I.getReshapedPermuted(n.shape,s,i),l=I.getSliceBeginCoords(a,s.length),m=I.getSliceSize(c,a,s.length),f=[],d=J({inputs:{x:n},backend:t,attrs:{shape:p}}),h=xt({inputs:{x:d},backend:t,attrs:{perm:u}}),g=J({inputs:{x:h},backend:t,attrs:{shape:c}}),y=ps({inputs:{x:g},backend:t,attrs:{begin:l,size:m}});return f.push(d),f.push(h),f.push(g),f.forEach(b=>t.disposeIntermediateTensorInfo(b)),y},KR={kernelName:hs,backendName:"webgl",kernelFunc:ZY};function JY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,weights:s}=e,{size:a}=o,i=t.readSync(n.dataId),p=t.readSync(s.dataId),u=oh(i,p,s.dtype,s.shape,a);return t.makeTensorInfo([a],s.dtype,u)}var jR={kernelName:up,backendName:"webgl",kernelFunc:JY};function eQ(r){let{inputs:e,backend:t}=r,{s0:o,s1:n}=e,s=t.readSync(o.dataId),a=t.readSync(n.dataId),i=I.assertAndGetBroadcastShape(Array.from(s),Array.from(a));return t.makeTensorInfo([i.length],"int32",Int32Array.from(i))}var XR={kernelName:pp,backendName:"webgl",kernelFunc:eQ};var tQ="return float(a != b);",Fw=ot({opSnippet:tQ,cpuKernelImpl:N$,dtype:"bool"}),YR={kernelName:go,backendName:"webgl",kernelFunc:Fw};function La(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.texData.get(o.dataId);return Rt({inputs:{x:n.complexTensorInfos.real},backend:t})}var QR={kernelName:la,backendName:"webgl",kernelFunc:La};var rQ="return float(int(x));";function ZR(r,e){let t=new fr(r.shape,rQ),o=e.runWebGLProgram(t,[r],"int32");return{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}function Dw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{dtype:s}=o;if(s==="complex64"){if(n.dtype==="complex64")return Rt({inputs:{x:n},backend:t});let a=Wr(n.shape),i=Dw({inputs:{x:n},backend:t,attrs:{dtype:"float32"}}),p=Ar({inputs:{real:i,imag:a},backend:t});return a.dispose(),t.disposeIntermediateTensorInfo(i),p}if(n.dtype==="complex64"){let a=La({inputs:{input:n},backend:t}),i=Dw({inputs:{x:a},backend:t,attrs:{dtype:s}});return t.disposeIntermediateTensorInfo(a),i}if(!x.hasEncodingLoss(n.dtype,s)){let a=Rt({inputs:{x:n},backend:t});return{dataId:a.dataId,shape:a.shape,dtype:s}}if(t.shouldExecuteOnCPU([n])){let a=t.texData.get(n.dataId).values,[i,p,u]=i$(a,n.shape,n.dtype,s);return t.makeTensorInfo(i,p,u)}if(s==="int32")return ZR(n,t);if(s==="bool"){let a=t.makeTensorInfo([],"bool",x.getTypedArrayFromDType("bool",1)),p=Fw({inputs:{a:n,b:a},backend:t});return t.disposeIntermediateTensorInfo(a),p}throw new Error(`Error in Cast: failed to cast ${n.dtype} to ${s}`)}var JR={kernelName:to,backendName:"webgl",kernelFunc:Dw};var eA="return ceil(x);",oQ=he({opSnippet:eA,packedOpSnippet:eA,cpuKernelImpl:u$}),tA={kernelName:ro,backendName:"webgl",kernelFunc:oQ};var kh=class{constructor(e){this.variableNames=["A"],this.customUniforms=[{name:"minVal",type:"
void main() {
float value = getAAtOutCoords();
if (isnan(value)) {
setOutput(value);
return;
}
setOutput(clamp(value, minVal, maxVal));
}
`}};var Th=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"minVal",type:"float"},{name:"maxVal",type:"float"}],this.outputShape=e,this.userCode=`
void main() {
vec4 value = getAAtOutCoords();
if (any(isnan(value))) {
setOutput(value);
return;
}
setOutput(clamp(value, vec4(minVal), vec4(maxVal)));
}
`}};function nQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{clipValueMin:s,clipValueMax:a}=o,i;P().getBool("WEBGL_PACK_CLIP")?i=new Th(n.shape):i=new kh(n.shape);let p=[[s],[a]];return t.runWebGLProgram(i,[n],n.dtype,p)}var rA={kernelName:Ro,backendName:"webgl",kernelFunc:nQ};var Nh=class{constructor(e){this.variableNames=["real","imag"],this.outputShape=e,this.userCode=`
void main() {
float re = abs(getRealAtOutCoords());
float im = abs(getImagAtOutCoords());
float mx = max(re, im);
// sadly the length function in glsl is not underflow-safe
// (at least not on Intel GPUs). So the safe solution is
// to ensure underflow-safety in all cases.
setOutput(
mx == 0.0 ? 0.0 : mx * length(vec2(1, min(re, im)/mx))
);
}
`}};function oA(r,e){return{dataId:e.dataId,dtype:e.dtype,shape:r.shape}}function sQ(r){let{inputs:e,backend:t}=r,{x:o}=e,n=t.texData.get(o.dataId),s=new Nh(o.shape),a=[oA(o,n.complexTensorInfos.real),oA(o,n.complexTensorInfos.imag)];return t.runWebGLProgram(s,a,a[0].dtype)}var nA={kernelName:cp,backendName:"webgl",kernelFunc:sQ};var _h=class{constructor(e){this.outputShape=[],this.outputShape=I.computeOutShape(e,1),this.variableNames=e.map((a,i)=>`T${i}`);let t=new Array(e.length-1);t[0]=e[0][1];for(let a=1;a<t.length;a++)t[a]=t[a-1]+e[a][1];let o=[`if (yC < ${t[0]}) setOutput(getT0(yR, yC));`];for(let a=1;a<t.length;a++){let i=t[a-1];o.push(`else if (yC < ${t[a]}) setOutput(getT${a}(yR, yC-${i}));`)}let n=t.length,s=t[t.length-1];o.push(`else setOutput(getT${n}(yR, yC-${s}));`),this.userCode=`
void main() {
ivec2 coords = getOutputCoords();
int yR = coords.x;
int yC = coords.y;
${o.join(`
`)}
}
`}};var $h=class{constructor(e,t){this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[],this.outputShape=I.computeOutShape(e,t);let o=this.outputShape,n=o.length,s=_e(n),a=$t("coords",n),i=["x","y","z","w","u","v"].slice(0,n);this.variableNames=e.map((h,g)=>`T${g}`);let p=new Array(e.length-1);p[0]=e[0][t];for(let h=1;h<p.length;h++)p[h]=p[h-1]+e[h][t];let u=i[t],c=i.slice(-2),l=i.join(),m=`if (${u} < ${p[0]}) {
return getChannel(
getT0(${l}), vec2(${c.join()}));
}`;for(let h=1;h<p.length;h++){let g=p[h-1];m+=`
if (${u} < ${p[h]} && ${u} >= ${p[h-1]}) {
return getChannel(
getT${h}(${Eh(i,u,g)}),
vec2(${Eh(c,u,g)}));
}`}let f=p.length,d=p[p.length-1];m+=`
return getChannel(
getT${f}(${Eh(i,u,d)}),
vec2(${Eh(c,u,d)}));`,this.userCode=`
float getValue(${i.map(h=>"int "+h)}) {
${m}
}
void main() {
${s} coords = getOutputCoords();
vec4 result = vec4(getValue(${a}), 0., 0., 0.);
${a[n-1]} = ${a[n-1]} + 1;
if (${a[n-1]} < ${o[n-1]}) {
result.g = getValue(${a});
}
${a[n-2]} = ${a[n-2]} + 1;
if (${a[n-2]} < ${o[n-2]}) {
result.a = getValue(${a});
}
${a[n-1]} = ${a[n-1]} - 1;
if (${a[n-2]} < ${o[n-2]} &&
${a[n-1]} < ${o[n-1]}) {
result.b = getValue(${a});
}
setOutput(result);
}
`}};function Eh(r,e,t){let o=r.indexOf(e);return r.map((s,a)=>a===o?`${s} - ${t}`:s).join()}function Lu(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.texData.get(o.dataId);return Rt({inputs:{x:n.complexTensorInfos.imag},backend:t})}var sA={kernelName:Ya,backendName:"webgl",kernelFunc:Lu};function yc(r,e,t){let o=r[0].dtype;if(o==="complex64"){let l=r.map(g=>La({inputs:{input:g},backend:t})),m=r.map(g=>Lu({inputs:{input:g},backend:t})),f=yc(l,e,t),d=yc(m,e,t),h=Ar({inputs:{real:f,imag:d},backend:t});return l.forEach(g=>t.disposeIntermediateTensorInfo(g)),m.forEach(g=>t.disposeIntermediateTensorInfo(g)),t.disposeIntermediateTensorInfo(f),t.disposeIntermediateTensorInfo(d),h}let n=t.shouldExecuteOnCPU(r);if(o==="string"&&(n=!0),n){let l=r.map(b=>{let w=[-1,x.sizeFromShape(b.shape.slice(e))];return J({inputs:{x:b},backend:t,attrs:{shape:w}})}),m=l.map(b=>({vals:t.readSync(b.dataId),shape:b.shape})),f=I.computeOutShape(l.map(b=>b.shape),1),d=l[0].shape[0]===1,h=p$(m,f,o,d),g=I.computeOutShape(r.map(b=>b.shape),e),y=t.makeTensorInfo(g,o,h);return l.forEach(b=>t.disposeIntermediateTensorInfo(b)),y}let s=P().getNumber("WEBGL_MAX_TEXTURES_IN_SHADER");if(r.length>s){let l=[];for(let f=0;f<r.length;f+=s){let d=r.slice(f,f+s);l.push(yc(d,e,t))}let m=yc(l,e,t);for(let f of l)t.disposeIntermediateTensorInfo(f);return m}if(P().getBool("WEBGL_PACK_ARRAY_OPERATIONS")&&r[0].shape.length>1){let l=new $h(r.map(m=>m.shape),e);return t.runWebGLProgram(l,r,o)}let{tensors2D:a,outShape:i}=aQ(r,e,t),p=new _h(a.map(l=>l.shape)),u=t.runWebGLProgram(p,a,o);a.forEach(l=>t.disposeIntermediateTensorInfo(l));let c=J({inputs:{x:u},attrs:{shape:i},backend:t});return t.disposeIntermediateTensorInfo(u),c}function aQ(r,e,t){let o=I.computeOutShape(r.map(s=>s.shape),e);return{tensors2D:r.map(s=>J({inputs:{x:s},attrs:{shape:[-1,x.sizeFromShape(s.shape.slice(e))]},backend:t})),outShape:o}}function Pw(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o,s=x.parseAxisParam(n,e[0].shape)[0],a=e.map(u=>u.shape);I.assertParamsConsistent(a,s);let i=I.computeOutShape(e.map(u=>u.shape),s);if(x.sizeFromShape(i)===0)return t.makeTensorInfo(i,e[0].dtype,[]);let p=e.filter(u=>x.sizeFromShape(u.shape)>0);return p.length===1?Rt({inputs:{x:p[0]},backend:t}):yc(p,s,t)}var aA={kernelName:gs,backendName:"webgl",kernelFunc:Pw};var bc=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.outputShape=e.outShape;let a=e.padInfo.top,i=e.padInfo.left,p=e.strideHeight,u=e.strideWidth,c=e.dilationHeight,l=e.dilationWidth,m=e.filterHeight,f=e.filterWidth,d=Math.floor(e.inChannels/4)*4,h=e.inChannels%4,g=e.dataFormat==="channelsLast",y=g?1:2,b=g?2:3,C=g?3:1,w="",k="";o&&(n?w=`float activation(float a) {
float b = getPreluActivationWeightsAtOutCoords();
${o}
}`:s?w=`float activation(float a) {
float b = getLeakyreluAlphaAtOutCoords();
${o}
}`:w=`
float activation(float x) {
${o}
}
`,k="result = activation(result);");let _=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
${w}
const ivec2 strides = ivec2(${p}, ${u});
const ivec2 pads = ivec2(${a}, ${i});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d2 = coords[${C}];
ivec2 xRCCorner =
ivec2(coords[${y}], coords[${b}]) * strides - pads;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${m}; wR++) {
int xR = xRCorner + wR * ${c};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${f}; wC++) {
int xC = xCCorner + wC * ${l};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
for (int d1 = 0; d1 < ${d}; d1 += 4) {
vec4 wValues = vec4(
getW(wR, wC, d1, d2),
getW(wR, wC, d1 + 1, d2),
getW(wR, wC, d1 + 2, d2),
getW(wR, wC, d1 + 3, d2)
);
if (${g}) {
vec4 xValues = vec4(
getX(batch, xR, xC, d1),
getX(batch, xR, xC, d1 + 1),
getX(batch, xR, xC, d1 + 2),
getX(batch, xR, xC, d1 + 3)
);
dotProd += dot(xValues, wValues);
} else {
vec4 xValues = vec4(
getX(batch, d1, xR, xC),
getX(batch, d1 + 1, xR, xC),
getX(batch, d1 + 2, xR, xC),
getX(batch, d1 + 3, xR, xC)
);
dotProd += dot(xValues, wValues);
}
}
if (${h===1}) {
if (${g}) {
dotProd +=
getX(batch, xR, xC, ${d}) *
getW(wR, wC, ${d}, d2);
} else {
dotProd +=
getX(batch, ${d}, xR, xC) *
getW(wR, wC, ${d}, d2);
}
} else if (${h===2}) {
vec2 wValues = vec2(
getW(wR, wC, ${d}, d2),
getW(wR, wC, ${d} + 1, d2)
);
if (${g}) {
vec2 xValues = vec2(
getX(batch, xR, xC, ${d}),
getX(batch, xR, xC, ${d} + 1)
);
dotProd += dot(xValues, wValues);
} else {
vec2 xValues = vec2(
getX(batch, ${d}, xR, xC),
getX(batch, ${d} + 1, xR, xC)
);
dotProd += dot(xValues, wValues);
}
} else if (${h===3}) {
vec3 wValues = vec3(
getW(wR, wC, ${d}, d2),
getW(wR, wC, ${d} + 1, d2),
getW(wR, wC, ${d} + 2, d2)
);
if (${g}) {
vec3 xValues = vec3(
getX(batch, xR, xC, ${d}),
getX(batch, xR, xC, ${d} + 1),
getX(batch, xR, xC, ${d} + 2)
);
dotProd += dot(xValues, wValues);
} else {
vec3 xValues = vec3(
getX(batch, ${d}, xR, xC),
getX(batch, ${d} + 1, xR, xC),
getX(batch, ${d} + 2, xR, xC)
);
dotProd += dot(xValues, wValues);
}
}
}
}
float result = dotProd;
${_}
${k}
setOutput(result);
}
`}},Rh=class{constructor(e){this.variableNames=["x","W"],this.outputShape=e.outShape;let t=e.padInfo.front,o=e.padInfo.top,n=e.padInfo.left,s=e.strideDepth,a=e.strideHeight,i=e.strideWidth,p=e.dilationDepth,u=e.dilationHeight,c=e.dilationWidth,l=e.filterDepth,m=e.filterHeight,f=e.filterWidth,d=Math.floor(e.inChannels/4)*4,h=e.inChannels%4;this.userCode=`
const ivec3 strides = ivec3(${s}, ${a}, ${i});
const ivec3 pads = ivec3(${t}, ${o}, ${n});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int d2 = coords.u;
ivec3 xFRCCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xFCorner = xFRCCorner.x;
int xRCorner = xFRCCorner.y;
int xCCorner = xFRCCorner.z;
// Convolve x(?, ?, ?, d1) with w(:, :, :, d1, d2) to get
// y(yF, yR, yC, d2). ? = to be determined. : = across all
// values in that axis.
float dotProd = 0.0;
for (int wF = 0; wF < ${l}; wF++) {
int xF = xFCorner + wF * ${p};
if (xF < 0 || xF >= ${e.inDepth}) {
continue;
}
for (int wR = 0; wR < ${m}; wR++) {
int xR = xRCorner + wR * ${u};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${f}; wC++) {
int xC = xCCorner + wC * ${c};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
for (int d1 = 0; d1 < ${d}; d1 += 4) {
vec4 xValues = vec4(
getX(batch, xF, xR, xC, d1),
getX(batch, xF, xR, xC, d1 + 1),
getX(batch, xF, xR, xC, d1 + 2),
getX(batch, xF, xR, xC, d1 + 3)
);
vec4 wValues = vec4(
getW(wF, wR, wC, d1, d2),
getW(wF, wR, wC, d1 + 1, d2),
getW(wF, wR, wC, d1 + 2, d2),
getW(wF, wR, wC, d1 + 3, d2)
);
dotProd += dot(xValues, wValues);
}
if (${h===1}) {
dotProd +=
getX(batch, xF, xR, xC, ${d}) *
getW(wF, wR, wC, ${d}, d2);
} else if (${h===2}) {
vec2 xValues = vec2(
getX(batch, xF, xR, xC, ${d}),
getX(batch, xF, xR, xC, ${d} + 1)
);
vec2 wValues = vec2(
getW(wF, wR, wC, ${d}, d2),
getW(wF, wR, wC, ${d} + 1, d2)
);
dotProd += dot(xValues, wValues);
} else if (${h===3}) {
vec3 xValues = vec3(
getX(batch, xF, xR, xC, ${d}),
getX(batch, xF, xR, xC, ${d} + 1),
getX(batch, xF, xR, xC, ${d} + 2)
);
vec3 wValues = vec3(
getW(wF, wR, wC, ${d}, d2),
getW(wF, wR, wC, ${d} + 1, d2),
getW(wF, wR, wC, ${d} + 2, d2)
);
dotProd += dot(xValues, wValues);
}
}
}
}
setOutput(dotProd);
}
`}};var Cc=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=e.outShape,this.enableShapeUniforms=lt(this.outputShape.length);let a=e.padInfo.left,i=e.strideWidth,p=e.dilationWidth,u=e.filterHeight,c=e.filterWidth,l=c,m=`
int xR; int xC; int xCOffset;
vec4 wTexel; vec4 previous; vec4 final;`;for(let g=0;g<c;g++)m+=`
vec4 xTexelC${g*2};
int xTexelC${g*2}Ready;
vec4 xTexelC${g*2+1};
int xTexelC${g*2+1}Ready;
vec4 xC${g};`;m+=`
for (int r = 0; r < ${u}; r++) {
for (int d1 = 0; d1 < ${e.inChannels}; d1 += 2) {
`;for(let g=0;g<c;g++)m+=`
xTexelC${g*2} = vec4(0.0);
xTexelC${g*2}Ready = 0;
xTexelC${g*2+1} = vec4(0.0);
xTexelC${g*2+1}Ready = 0;
xC${g} = vec4(0.0);`;m+=`
xR = xRCorner + r * dilations[0];
if (xR >=0 && xR < inDims[0]) {
`;for(let g=0;g<(l+1)/2;g++){let y=g*2;if(m+=`
xC = xCCorner + ${y*p};
`,i===1){if(y<c&&(a%2===1?(m+=`
xCOffset = xC + 1;
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${y}Ready == 0) {
xTexelC${y} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${y}.zw = vec2(0.0);
}
xTexelC${y}Ready = 1;
}
`,p===1&&y>0?m+=`
xC${y} = vec4(xTexelC${y-2}.zw, xTexelC${y}.xy);
`:m+=`
xCOffset = xC + 1 - 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
previous.zw = vec2(0.0);
}
xC${y} = vec4(previous.zw, xTexelC${y}.xy);
} else {
xC${y} = vec4(0.0, 0.0, xTexelC${y}.xy);
}
`):m+=`
if (xC >= 0 && xC < inDims[1] && xTexelC${y}Ready == 0) {
xTexelC${y} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${y}.zw = vec2(0.0);
}
xTexelC${y}Ready = 1;
}
xC${y} = xTexelC${y};
`,y+1<c)){let b=a%2===0?x.nearestLargerEven(p):p;p%2===0&&a%2===1||p%2!==0&&a%2!==1?(m+=`
xCOffset = xC + imod(pads[1], 2) + ${b};
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${y+1}Ready == 0) {
xTexelC${y+1} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${y+1}.zw = vec2(0.0);
}
xTexelC${y+1}Ready = 1;
}
`,p>1?m+=`
xCOffset -= 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
xC${y+1} = vec4(previous.zw, xTexelC${y+1}.xy);
} else {
xC${y+1} = vec4(0.0, 0.0, xTexelC${y+1}.xy);
}
`:m+=`
xC${y+1} = vec4(xTexelC${y}.zw, xTexelC${y+1}.xy);
`):b===1?m+=`
xC${y+1} = xTexelC${y};
`:m+=`
xCOffset = xC + ${b};
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${y+1}Ready == 0) {
xTexelC${y+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${y+1}.zw = vec2(0.0);
}
xTexelC${y+1}Ready = 1;
}
xC${y+1} = xTexelC${y+1};
`}}else y<c&&(a%2===1?(m+=`
xCOffset = xC + 1 - strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${y}Ready == 0) {
xTexelC${y} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${y}.zw = vec2(0.0);
}
xTexelC${y}Ready = 1;
}
if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${y+1}Ready == 0) {
xTexelC${y+1} = getX(batch, xR, xC + 1, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xC + 2 >= inDims[1]) {
xTexelC${y+1}.zw = vec2(0.0);
}
xTexelC${y+1}Ready = 1;
}
xC${y} = vec4(xTexelC${y}.zw, xTexelC${y+1}.zw);
`,y+1<c&&(m+=`
final = vec4(0.0);
xCOffset = xC + 1 + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1]) {
final = getX(batch, xR, xCOffset, d1);
}
xC${y+1} = vec4(xTexelC${y+1}.xy, final.xy);
`)):(m+=`
if(xC >= 0 && xC < inDims[1] && xTexelC${y}Ready == 0) {
xTexelC${y} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${y}.zw = vec2(0.0);
}
xTexelC${y}Ready = 1;
}
xCOffset = xC + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${y+1}Ready == 0) {
xTexelC${y+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${y+1}.zw = vec2(0.);
}
xTexelC${y+1}Ready = 1;
}
xC${y} = vec4(
xTexelC${y}.xy, xTexelC${y+1}.xy);
`,y+1<c&&(m+=`
xC${y+1} = vec4(xTexelC${y}.zw, xTexelC${y+1}.zw);
`)));y<c&&(m+=`
wTexel = getW(r, ${y}, d1, d2);
dotProd += xC${y}.xxzz * vec4(wTexel.xy, wTexel.xy);
if(d1 + 1 < ${e.inChannels}) {
dotProd += xC${y}.yyww * vec4(wTexel.zw, wTexel.zw);
}
`,y+1<c&&(m+=`
wTexel = getW(r, ${y+1}, d1, d2);
dotProd += xC${y+1}.xxzz * vec4(wTexel.xy, wTexel.xy);
if(d1 + 1 < ${e.inChannels}) {
dotProd += xC${y+1}.yyww * vec4(wTexel.zw, wTexel.zw);
}
`))}m+=`
}
`,m+=`
}
`,m+=`
}
`;let f="",d="";o&&(n?f=`vec4 activation(vec4 a) {
vec4 b = getPreluActivationWeightsAtOutCoords();
${o}
}`:s?f=`vec4 activation(vec4 a) {
vec4 b = getLeakyreluAlphaAtOutCoords();
${o}
}`:f=`vec4 activation(vec4 x) {
${o}
}`,d="result = activation(result);");let h=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
${f}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
//intialize dotProd with a small epsilon seems to reduce GPU accuracy loss.
vec4 dotProd = vec4(0.000000000000001);
${m}
vec4 result = dotProd - vec4(0.000000000000001);
${h}
${d}
setOutput(result);
}
`}};var Ah=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"inputShape",type:"ivec4"},{name:"pad",type:"ivec2"},{name:"stride",type:"ivec2"},{name:"dilation",type:"ivec2"},{name:"inChannels",type:"int"},{name:"itemsPerBlockRow",type:"int"},{name:"outWidth",type:"int"}],this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let{dataFormat:o}=t,n=Ct(),s=o==="channelsLast",a=s?1:2,i=s?2:3,p=this.enableShapeUniforms?"if(blockIndex < outShape[2] && pos < outShape[1]) {":`if(blockIndex < ${e[2]} && pos < ${e[1]}) {`,u="";for(let c=0;c<=1;c++)for(let l=0;l<=1;l++)u+=`
blockIndex = rc.z + ${l};
pos = rc.y + ${c};
${p}
offsetY = int(blockIndex / outWidth) * stride[0] - pad[0];
d0 = offsetY + dilation[0] * (pos / itemsPerBlockRow);
if(d0 < inputShape[${a}] && d0 >= 0) {
// Use custom imod instead mod. On Intel GPU, mod may generate
// unexpected value.
// https://github.com/tensorflow/tfjs/issues/5447
offsetX = imod(blockIndex, outWidth) * stride[1] - pad[1];
d1 = offsetX + dilation[1] * (imod(pos, itemsPerBlockRow) /
inChannels);
if(d1 < inputShape[${i}] && d1 >= 0) {
ch = imod(pos, inChannels);
if (${s}) {
innerDims = vec2(d1, ch);
result[${c*2+l}] = getChannel(
getA(rc.x, d0, int(innerDims.x),
int(innerDims.y)), innerDims);
} else {
innerDims = vec2(d0, d1);
result[${c*2+l}] = getChannel(
getA(rc.x, ch, int(innerDims.x),
int(innerDims.y)), innerDims);
}
}
}
}
`;this.userCode=`
void main() {
ivec3 rc = getOutputCoords();
vec4 result = vec4(0);
int blockIndex, pos, offsetY, d0, offsetX, d1, ch;
vec2 innerDims;
${u}
${n.output} = result;
}
`}};function Fh(r,e){let t=r.length;return t>=3?e?[...r.slice(0,-3),r[t-3]*r[t-2],r[t-1]]:[...r.slice(0,-3),r[t-3],r[t-2]*r[t-1]]:!e&&t===1&&r[0]>1?[r[0],1]:null}function Dh({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=r.shape,u=o.texData.get(r.dataId),c=t.inChannels,l=p[0]*p[1]*p[2],m=t.outChannels,f=t.dataFormat==="channelsLast",d=!1,h=!1,g,y=[];if(s!=null){let w=Fh(s.shape,f);w!=null&&(s=J({inputs:{x:s},backend:o,attrs:{shape:w}}),y.push(s))}if(n!=null){let w=Fh(n.shape,f);w!=null&&(n=J({inputs:{x:n},backend:o,attrs:{shape:w}}),y.push(n))}if(!((l===1||m===1)&&c>Rw)&&u.isPacked&&f&&u.texture!=null&&p[2]%2!==0&&x.arraysEqual(u.shape.slice(-3),p.slice(-3))){let w=p[0]*p[1]*(p[2]+1),k={dataId:r.dataId,shape:[1,w,t.inChannels],dtype:r.dtype},_=u.shape;u.shape=u.shape.slice(),u.shape[u.shape.length-2]++,x.assert(Ti(u.shape,k.shape),()=>`packed reshape ${u.shape} to ${k.shape} isn't free`);let E=J({inputs:{x:e},backend:o,attrs:{shape:[1,t.inChannels,t.outChannels]}});y.push(E);let R=Mu({a:k,b:E,backend:o,transposeA:d,transposeB:h,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),A=o.texData.get(R.dataId);x.assert(A.isPacked,()=>"batchMatMul result is expected to be packed"),u.shape=_,A.shape=t.outShape,g=Rt({inputs:{x:R},backend:o}),g.shape=t.outShape,y.push(R)}else{let w=t.outHeight*t.outWidth,k=J({inputs:{x:r},backend:o,attrs:{shape:f?[t.batchSize,w,t.inChannels]:[t.batchSize,t.inChannels,w]}}),_=J({inputs:{x:e},backend:o,attrs:{shape:[1,t.inChannels,t.outChannels]}}),E=Mu({a:f?k:_,b:f?_:k,transposeA:!f,transposeB:h,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a});g=J({inputs:{x:E},backend:o,attrs:{shape:t.outShape}}),y.push(k),y.push(_),y.push(E)}for(let w of y)o.disposeIntermediateTensorInfo(w);return g}function Ph({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let{filterWidth:p,filterHeight:u,inChannels:c,outWidth:l,outHeight:m,dataFormat:f}=t,d=f==="channelsLast",h=p*u*c,g=m*l,y=[t.batchSize,h,g],b=!0,C=!1,w=[];if(s!=null){let H=Fh(s.shape,d);H!=null&&(s=J({inputs:{x:s},backend:o,attrs:{shape:H}}),w.push(s))}if(n!=null){let H=Fh(n.shape,d);H!=null&&(n=J({inputs:{x:n},backend:o,attrs:{shape:H}}),w.push(n))}let k=J({inputs:{x:e},backend:o,attrs:{shape:[1,h,x.sizeFromShape(e.shape)/h]}});w.push(k);let _=new Ah(y,t),E=[r.shape,[t.padInfo.top,t.padInfo.left],[t.strideHeight,t.strideWidth],[t.dilationHeight,t.dilationWidth],[t.inChannels],[t.filterWidth*t.inChannels],[t.outWidth]],R=o.runWebGLProgram(_,[r],"float32",E),A=J({inputs:{x:R},backend:o,attrs:{shape:y}});w.push(R),w.push(A);let D=n!=null,O=s!=null,M=i==="leakyrelu",L=i?Ma(i,!0):null,W=new xc(d?A.shape:k.shape,d?k.shape:A.shape,d?[t.batchSize,g,t.outChannels]:[t.batchSize,t.outChannels,g],b,C,D,L,O,M),V=d?[A,k]:[k,A];if(n&&V.push(n),O&&V.push(s),M){let H=o.makeTensorInfo([],"float32",x.createScalarValue(a,"float32"));V.push(H),w.push(H)}let G=o.runWebGLProgram(W,V,"float32"),q=J({inputs:{x:G},backend:o,attrs:{shape:t.outShape}});w.push(G);for(let H of w)o.disposeIntermediateTensorInfo(H);return q}function iQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dataFormat:p,dilations:u,dimRoundingMode:c}=o,l=I.convertConv2DDataFormat(p),m=I.computeConv2DInfo(n.shape,s.shape,a,u,i,c,!1,l),f;if(m.filterHeight===1&&m.filterWidth===1&&m.dilationHeight===1&&m.dilationWidth===1&&m.strideHeight===1&&m.strideWidth===1&&(m.padInfo.type==="SAME"||m.padInfo.type==="VALID"))f=Dh({x:n,filter:s,convInfo:m,backend:t});else if(m.strideWidth<=2&&l==="channelsLast"&&P().getBool("WEBGL_EXP_CONV")){let h=new Cc(m),g=[[m.padInfo.top,m.padInfo.left],[m.strideHeight,m.strideWidth],[m.dilationHeight,m.dilationWidth],[m.inHeight,m.inWidth]];f=t.runWebGLProgram(h,[n,s],"float32",g)}else if(P().getBool("WEBGL_CONV_IM2COL"))f=Ph({x:n,filter:s,convInfo:m,backend:t});else{let h=new bc(m);f=t.runWebGLProgram(h,[n,s],"float32")}let d=J({inputs:{x:f},backend:t,attrs:{shape:m.outShape}});return t
void main() {
ivec4 coords = getOutputCoords();
int wR = coords.x;
int wC = coords.y;
int d1 = coords.z;
int d2 = coords.w;
// Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int b = 0; b < ${e.batchSize}; b++) {
for (int yR = 0; yR < ${e.outHeight}; yR++) {
int xR = wR + yR * ${t} - ${n};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int yC = 0; yC < ${e.outWidth}; yC++) {
int xC = wC + yC * ${o} - ${s};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
if (${a}) {
float dyValue = getDy(b, yR, yC, d2);
float xValue = getX(b, xR, xC, d1);
dotProd += (xValue * dyValue);
} else {
float dyValue = getDy(b, d2, yR, yC);
float xValue = getX(b, d1, xR, xC);
dotProd += (xValue * dyValue);
}
}
}
}
setOutput(dotProd);
}
`}},Mh=class{constructor(e){this.variableNames=["dy","W"],this.outputShape=e.inShape;let t=e.filterHeight,o=e.filterWidth,n=e.strideHeight,s=e.strideWidth,a=e.dataFormat==="channelsLast",i=t-1-e.padInfo.top,p=o-1-e.padInfo.left,u=a?1:2,c=a?2:3,l=a?3:1;this.userCode=`
const ivec2 pads = ivec2(${i}, ${p});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d1 = coords[${l}];
ivec2 dyCorner = ivec2(coords[${u}], coords[${c}]) - pads;
int dyRCorner = dyCorner.x;
int dyCCorner = dyCorner.y;
// Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${t}; wR++) {
float dyR = float(dyRCorner + wR) / ${n}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
int wRPerm = ${t} - 1 - wR;
for (int wC = 0; wC < ${o}; wC++) {
float dyC = float(dyCCorner + wC) / ${s}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${o} - 1 - wC;
for (int d2 = 0; d2 < ${e.outChannels}; d2++) {
if (${a}) {
float xValue = getDy(batch, idyR, idyC, d2);
float wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
} else {
float xValue = getDy(batch, d2, idyR, idyC);
float wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
}
}
}
}
setOutput(dotProd);
}
`}},Lh=class{constructor(e){this.variableNames=["x","dy"],this.outputShape=e.filterShape;let t=e.strideDepth,o=e.strideHeight,n=e.strideWidth,s=e.padInfo.front,a=e.padInfo.top,i=e.padInfo.left;this.userCode=`
void main() {
ivec5 coords = getOutputCoords();
int wF = coords.x;
int wR = coords.y;
int wC = coords.z;
int d1 = coords.w;
int d2 = coords.u;
float dotProd = 0.0;
for (int b = 0; b < ${e.batchSize}; b++) {
for (int yF = 0; yF < ${e.outDepth}; yF++) {
int xF = wF + yF * ${t} - ${s};
if (xF < 0 || xF >= ${e.inDepth}) {
continue;
}
for (int yR = 0; yR < ${e.outHeight}; yR++) {
int xR = wR + yR * ${o} - ${a};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int yC = 0; yC < ${e.outWidth}; yC++) {
int xC = wC + yC * ${n} - ${i};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float dyValue = getDy(b, yF, yR, yC, d2);
float xValue = getX(b, xF, xR, xC, d1);
dotProd += (xValue * dyValue);
}
}
}
}
setOutput(dotProd);
}
`}},Bh=class{constructor(e){this.variableNames=["dy","W"],this.outputShape=e.inShape;let t=e.filterDepth,o=e.filterHeight,n=e.filterWidth,s=e.strideDepth,a=e.strideHeight,i=e.strideWidth,p=t-1-e.padInfo.front,u=o-1-e.padInfo.top,c=n-1-e.padInfo.left;this.userCode=`
const ivec3 pads = ivec3(${p}, ${u}, ${c});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int d1 = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyFCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
float dotProd = 0.0;
for (int wF = 0; wF < ${t}; wF++) {
float dyF = float(dyFCorner + wF) / ${s}.0;
if (dyF < 0.0 || dyF >= ${e.outDepth}.0 || fract(dyF) > 0.0) {
continue;
}
int idyF = int(dyF);
int wFPerm = ${t} - 1 - wF;
for (int wR = 0; wR < ${o}; wR++) {
float dyR = float(dyRCorner + wR) / ${a}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 ||
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
int wRPerm = ${o} - 1 - wR;
for (int wC = 0; wC < ${n}; wC++) {
float dyC = float(dyCCorner + wC) / ${i}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${n} - 1 - wC;
for (int d2 = 0; d2 < ${e.outChannels}; d2++) {
float xValue = getDy(batch, idyF, idyR, idyC, d2);
float wValue = getW(wFPerm, wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
}
}
}
}
setOutput(dotProd);
}
`}};function uQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,dy:s}=e,{strides:a,pad:i,dataFormat:p,dimRoundingMode:u,filterShape:c}=o,l=I.convertConv2DDataFormat(p),m=I.computeConv2DInfo(n.shape,c,a,1,i,u,!1,l),f=new Oh(m);return t.runWebGLProgram(f,[n,s],"float32")}var uA={kernelName:lp,backendName:"webgl",kernelFunc:uQ};function pQ(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{inputShape:a,strides:i,pad:p,dataFormat:u,dimRoundingMode:c}=o,l=I.convertConv2DDataFormat(u),m=I.computeConv2DInfo(a,s.shape,i,1,p,c,!1,l),f=new Mh(m);return t.runWebGLProgram(f,[n,s],"float32")}var pA={kernelName:mn,backendName:"webgl",kernelFunc:pQ};function cQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dilations:p}=o,u=I.computeConv3DInfo(n.shape,s.shape,a,p,i),c=new Rh(u);return t.runWebGLProgram(c,[n,s],"float32")}var cA={kernelName:mp,backendName:"webgl",kernelFunc:cQ};function lQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,dy:s}=e,{strides:a,pad:i,filterShape:p}=o,u=I.computeConv3DInfo(n.shape,p,a,1,i),c=new Lh(u);return t.runWebGLProgram(c,[n,s],"float32")}var lA={kernelName:Dm,backendName:"webgl",kernelFunc:lQ};function mQ(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{pad:a,strides:i,inputShape:p}=o,u=I.computeConv3DInfo(p,s.shape,i,1,a),c=new Bh(u);return t.runWebGLProgram(c,[n,s],"float32")}var mA={kernelName:fp,backendName:"webgl",kernelFunc:mQ};var fQ=jo+`
return cos(x);
`,dQ=he({opSnippet:fQ}),fA={kernelName:fn,backendName:"webgl",kernelFunc:dQ};var hQ=`
float e2x = exp(-x);
return (e2x + 1.0 / e2x) / 2.0;
`,gQ=he({opSnippet:hQ}),dA={kernelName:dn,backendName:"webgl",kernelFunc:gQ};var Vh=class{constructor(e,t,o,n,s){this.variableNames=["Image","Boxes","BoxInd"],this.outputShape=[];let[a,i,p,u]=e,[c]=t,[l,m]=o;this.outputShape=[c,l,m,u];let f=n==="bilinear"?1:0,[d,h]=[`${i-1}.0`,`${p-1}.0`],[g,y,b]=l>1?[`${(i-1)/(l-1)}`,"(y2-y1) * height_ratio",`y1*${d} + float(y)*(height_scale)`]:["0.0","0.0",`0.5 * (y1+y2) * ${d}`],[C,w,k]=m>1?[`${(p-1)/(m-1)}`,"(x2-x1) * width_ratio",`x1*${h} + float(x)*(width_scale)`]:["0.0","0.0",`0.5 * (x1+x2) * ${h}`];this.userCode=`
const float height_ratio = float(${g});
const float width_ratio = float(${C});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int y = coords[1];
int x = coords[2];
int d = coords[3];
// get box vals
float y1 = getBoxes(b,0);
float x1 = getBoxes(b,1);
float y2 = getBoxes(b,2);
float x2 = getBoxes(b,3);
// get image in batch index
int bInd = round(getBoxInd(b));
if(bInd < 0 || bInd >= ${a}) {
return;
}
float height_scale = ${y};
float width_scale = ${w};
float in_y = ${b};
if( in_y < 0.0 || in_y > ${d} ) {
setOutput(float(${s}));
return;
}
float in_x = ${k};
if( in_x < 0.0 || in_x > ${h} ) {
setOutput(float(${s}));
return;
}
vec2 sourceFracIndexCR = vec2(in_x,in_y);
if(${f} == 1) {
// Compute the four integer indices.
ivec2 sourceFloorCR = ivec2(sourceFracIndexCR);
ivec2 sourceCeilCR = ivec2(ceil(sourceFracIndexCR));
float topLeft = getImage(b, sourceFloorCR.y, sourceFloorCR.x, d);
float bottomLeft = getImage(b, sourceCeilCR.y, sourceFloorCR.x, d);
float topRight = getImage(b, sourceFloorCR.y, sourceCeilCR.x, d);
float bottomRight = getImage(b, sourceCeilCR.y, sourceCeilCR.x, d);
vec2 fracCR = sourceFracIndexCR - vec2(sourceFloorCR);
float top = topLeft + (topRight - topLeft) * fracCR.x;
float bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR.x;
float newValue = top + (bottom - top) * fracCR.y;
setOutput(newValue);
} else {
// Compute the coordinators of nearest neighbor point.
ivec2 sourceNearestCR = ivec2(floor(
sourceFracIndexCR + vec2(0.5,0.5)));
float newValue = getImage(b, sourceNearestCR.y, sourceNearestCR.x, d);
setOutput(newValue);
}
}
`}};var xQ=r=>{let{inputs:e,backend:t,attrs:o}=r,{image:n,boxes:s,boxInd:a}=e,{cropSize:i,method:p,extrapolationValue:u}=o,c=new Vh(n.shape,s.shape,i,p,u);return t.runWebGLProgram(c,[n,s,a],"float32")},hA={kernelName:xn,backendName:"webgl",kernelFunc:xQ};var Bu;(function(r){r.Prod="*",r.Sum="+"})(Bu||(Bu={}));var Ml=class{constructor(e,t,o,n){this.op=e,this.outputShape=t,this.variableNames=["x"],this.customUniforms=[{name:"index",type:"float"}];let s=this.outputShape.length,a=this.op===Bu.Prod?"1.0":"0.0",i=o?a:`getX(${gA(s,"coords",this.op)})`,p=this.outputShape[this.outputShape.length-1],u="",c="";o?(u=n?`end != ${p-1}`:"end != 0",c=n?"end + 1":"end - 1"):(u=n?`end + pow2 < ${p}`:"end >= pow2",c=n?"end + pow2":"end - pow2"),this.userCode=`
void main() {
${_e(s)} coords = getOutputCoords();
int end = ${xA(s,"coords",this.op)};
float val = ${i};
int pow2 = int(pow(2.0, index));
if (${u}) {
int idx = ${c};
${xA(s,"coords",this.op)} = idx;
val ${this.op}= getX(${gA(s,"coords",this.op)});
}
setOutput(val);
}
`}};function gA(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.x, ${e}.y`;if(r===3)return`${e}.x, ${e}.y, ${e}.z`;if(r===4)return`${e}.x, ${e}.y, ${e}.z, ${e}.w`;throw new Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function xA(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.y`;if(r===3)return`${e}.z`;if(r===4)return`${e}.w`;throw new Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function zh(r,e,t,o,n,s){let a=e.shape.length,i=I.getAxesPermutation([o],a),p=e;i!=null&&(p=xt({inputs:{x:e},backend:t,attrs:{perm:i}}));let u=I.getInnerMostAxes(1,a)[0];if(u!==a-1)throw new Error(`WebGL cumprod shader expects an inner-most axis=${e.shape.length-1} but got axis=${o}`);let c=p.shape[u],l=Rt({inputs:{x:p},backend:t});for(let m=0;m<=Math.ceil(Math.log2(c))-1;m++){let f=new Ml(r,p.shape,!1,s),d=[[m]],h=l;l=t.runWebGLProgram(f,[l],l.dtype,d),t.disposeIntermediateTensorInfo(h)}if(n){let m=new Ml(r,p.shape,n,s),f=l;l=t.runWebGLProgram(m,[l],l.dtype),t.disposeIntermediateTensorInfo(f)}if(i!=null){let m=I.getUndoAxesPermutation(i),f=xt({inputs:{x:l},backend:t,attrs:{perm:m}});return t.disposeIntermediateTensorInfo(l),t.disposeIntermediateTensorInfo(p),f}return l}function yQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return zh(Bu.Prod,n,t,s,a,i)}var yA={kernelName:hn,backendName:"webgl",kernelFunc:yQ};function bQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return zh(Bu.Sum,n,t,s,a,i)}var bA={kernelName:gn,backendName:"webgl",kernelFunc:bQ};function CQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,weights:s}=e,{size:a,binaryOutput:i}=o;if(n.shape.length===1){let p=t.readSync(n.dataId),u=t.readSync(s.dataId),c=oh(p,u,s.dtype,s.shape,a);return t.makeTensorInfo([a],s.dtype,c)}else if(n.shape.length===2){let p=t.bufferSync(n),u=t.bufferSync(s),c=a$(p,u,a,i);return t.makeTensorInfo(c.shape,s.dtype,c.values)}throw new Error(`Error in denseBincount: input must be at most rank 2, but got rank${n.shape.length}.`)}var CA={kernelName:dp,backendName:"webgl",kernelFunc:CQ};var Wh=class{constructor(e,t,o){this.variableNames=["x"],this.outputShape=[],this.outputShape=e,this.blockSize=t,this.dataFormat=o,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int h = ${this.getHeightCoordString()};
int w = ${this.getWidthCoordString()};
int d = ${this.getDepthCoordString()};
int in_h = h / ${t};
int offset_h = imod(h, ${t});
int in_w = w / ${t};
int offset_w = imod(w, ${t});
int offset_d = (offset_h * ${t} + offset_w) *
${this.getOutputDepthSize()};
int in_d = d + offset_d;
float result = ${this.getInputSamplingString()};
setOutput(result);
}
`}getHeightCoordString(){return this.dataFormat==="NHWC"?"coords[1]":"coords[2]"}getWidthCoordString(){return this.dataFormat==="NHWC"?"coords[2]":"coords[3]"}getDepthCoordString(){return this.dataFormat==="NHWC"?"coords[3]":"coords[1]"}getOutputDepthSize(){return this.dataFormat==="NHWC"?this.outputShape[3]:this.outputShape[1]}getInputSamplingString(){return this.dataFormat==="NHWC"?"getX(b, in_h, in_w, in_d)":"getX(b, in_d, in_h, in_w)"}};function IQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockSize:s,dataFormat:a}=o,i=n.shape[0],p=a==="NHWC"?n.shape[1]:n.shape[2],u=a==="NHWC"?n.shape[2]:n.shape[3],c=a==="NHWC"?n.shape[3]:n.shape[1],l=p*s,m=u*s,f=c/(s*s),d=a==="NHWC"?[i,l,m,f]:[i,f,l,m],h=new Wh(d,s,a);return t.runWebGLProgram(h,[n],n.dtype)}var IA={kernelName:yn,backendName:"webgl",kernelFunc:IQ};var Ic=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=e.outShape,this.enableShapeUniforms=lt(this.outputShape.length);let a=e.filterHeight,i=e.filterWidth,p=e.outChannels/e.inChannels,u="",c="";o&&(n?u=`float activation(float a) {
float b = getPreluActivationWeightsAtOutCoords();
${o}
}`:s?u=`float activation(float a) {
float b = getLeakyreluAlphaAtOutCoords();
${o}
}`:u=`
float activation(float x) {
${o}
}
`,c="result = activation(result);");let l=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
${u}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int d1 = d2 / ${p};
int q = d2 - d1 * ${p};
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// Convolve x(?, ?, d1) with w(:, :, d1, q) to get y(yR, yC, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
// TO DO(dsmilkov): Flatten the two for loops and vec4 the operations.
for (int wR = 0; wR < ${a}; wR++) {
int xR = xRCorner + wR * dilations[0];
if (xR < 0 || xR >= inDims[0]) {
continue;
}
for (int wC = 0; wC < ${i}; wC++) {
int xC = xCCorner + wC * dilations[1];
if (xC < 0 || xC >= inDims[1]) {
continue;
}
float xVal = getX(batch, xR, xC, d1);
float wVal = getW(wR, wC, d1, q);
dotProd += xVal * wVal;
}
}
float result = dotProd;
${l}
${c}
setOutput(result);
}
`}};var wc=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=e.outShape,this.enableShapeUniforms=lt(this.outputShape.length);let a=e.outChannels/e.inChannels,i=e.padInfo.left,p=e.strideWidth,u=e.dilationWidth,c=e.filterHeight,l=e.filterWidth,m=l,f=`
int xR; int xC; int xCOffset;
vec4 wTexel; vec4 previous; vec4 final;`;for(let y=0;y<l;y++)f+=`
vec4 xTexelC${y*2};
int xTexelC${y*2}Ready;
vec4 xTexelC${y*2+1};
int xTexelC${y*2+1}Ready;
vec4 xC${y};`;f+=`
for (int r = 0; r < ${c}; r++) {
`;for(let y=0;y<l;y++)f+=`
xTexelC${y*2} = vec4(0.0);
xTexelC${y*2}Ready = 0;
xTexelC${y*2+1} = vec4(0.0);
xTexelC${y*2+1}Ready = 0;
xC${y} = vec4(0.0);`;f+=`
xR = xRCorner + r * dilations[0];
if (xR >=0 && xR < inDims[0]) {
`;for(let y=0;y<(m+1)/2;y++){let b=y*2;if(f+=`
xC = xCCorner + ${b*u};
`,p===1){if(b<l&&(i%2===1?(f+=`
xCOffset = xC + 1;
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
`,u===1&&b>0?f+=`
xC${b} = vec4(xTexelC${b-2}.zw, xTexelC${b}.xy);
`:f+=`
xCOffset = xC + 1 - 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
previous.zw = vec2(0.0);
}
xC${b} = vec4(previous.zw, xTexelC${b}.xy);
} else {
xC${b} = vec4(0.0, 0.0, xTexelC${b}.xy);
}
`):f+=`
if (xC >= 0 && xC < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
xC${b} = xTexelC${b};
`,b+1<l)){let C=i%2===0?x.nearestLargerEven(u):u;u%2===0&&i%2===1||u%2!==0&&i%2!==1?(f+=`
xCOffset = xC + imod(pads[1], 2) + ${C};
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
`,u>1?f+=`
xCOffset -= 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
xC${b+1} = vec4(previous.zw, xTexelC${b+1}.xy);
} else {
xC${b+1} = vec4(0.0, 0.0, xTexelC${b+1}.xy);
}
`:f+=`
xC${b+1} = vec4(xTexelC${b}.zw, xTexelC${b+1}.xy);
`):C===1?f+=`
xC${b+1} = xTexelC${b};
`:f+=`
xCOffset = xC + ${C};
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
xC${b+1} = xTexelC${b+1};
`}}else b<l&&(i%2===1?(f+=`
xCOffset = xC + 1 - strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xC + 1, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xC + 2 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
xC${b} = vec4(xTexelC${b}.zw, xTexelC${b+1}.zw);
`,b+1<l&&(f+=`
final = vec4(0.0);
xCOffset = xC + 1 + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1]) {
final = getX(batch, xR, xCOffset, d1);
}
xC${b+1} = vec4(xTexelC${b+1}.xy, final.xy);
`)):(f+=`
if(xC >= 0 && xC < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
xCOffset = xC + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.);
}
xTexelC${b+1}Ready = 1;
}
xC${b} = vec4(
xTexelC${b}.xy, xTexelC${b+1}.xy);
`,b+1<l&&(f+=`
xC${b+1} = vec4(xTexelC${b}.zw, xTexelC${b+1}.zw);
`)));b<l&&(f+=`
wTexel = getW(r, ${b}, d1, q);
dotProd += xC${b} * vec4(wTexel.xz, wTexel.xz);
`,b+1<l&&(f+=`
wTexel = getW(r, ${b+1}, d1, q);
dotProd += xC${b+1} * vec4(wTexel.xz, wTexel.xz);
`))}f+=`
}
`,f+=`
}
`;let d="",h="";o&&(n?d=`vec4 activation(vec4 a) {
vec4 b = getPreluActivationWeightsAtOutCoords();
${o}
}`:s?d=`vec4 activation(vec4 a) {
vec4 b = getLeakyreluAlphaAtOutCoords();
${o}
}`:d=`vec4 activation(vec4 x) {
${o}
}`,h="result = activation(result);");let g=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
${d}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int d1 = d2 / ${a};
int q = d2 - d1 * ${a};
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
//intialize dotProd with a small epsilon seems to reduce GPU accuracy loss.
vec4 dotProd = vec4(0.000000000000001);
${f}
vec4 result = dotProd - vec4(0.000000000000001);
${g}
${h}
setOutput(result);
}
`}};function wQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dilations:p,dimRoundingMode:u}=o,c=p;c==null&&(c=[1,1]),x.assert(I.eitherStridesOrDilationsAreOne(a,c),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${a} and dilations '${c}'`);let l=I.computeConv2DInfo(n.shape,s.shape,a,c,i,u,!0),m;P().getBool("WEBGL_PACK_DEPTHWISECONV")&&l.strideWidth<=2&&l.outChannels/l.inChannels===1?m=new wc(l):m=new Ic(l);let f=[[l.padInfo.top,l.padInfo.left],[l.strideHeight,l.strideWidth],[l.dilationHeight,l.dilationWidth],[l.inHeight,l.inWidth]];return t.runWebGLProgram(m,[n,s],"float32",f)}var wA={kernelName:bn,backendName:"webgl",kernelFunc:wQ};var Uh=class{constructor(e){this.variableNames=["x","dy"],this.outputShape=e.filterShape;let t=e.strideHeight,o=e.strideWidth,n=e.padInfo.top,s=e.padInfo.left,a=e.outChannels/e.inChannels;this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int wR = coords.x;
int wC = coords.y;
int d1 = coords.z;
int dm = coords.w;
int d2 = d1 * ${a} + dm;
float dotProd = 0.0;
// TO DO: Vec4 over the batch size
for (int b = 0; b < ${e.batchSize}; b++) {
for (int yR = 0; yR < ${e.outHeight}; yR++) {
int xR = wR + yR * ${t} - ${n};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int yC = 0; yC < ${e.outWidth}; yC++) {
int xC = wC + yC * ${o} - ${s};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float dyValue = getDy(b, yR, yC, d2);
float xValue = getX(b, xR, xC, d1);
dotProd += (xValue * dyValue);
}
}
}
setOutput(dotProd);
}
`}},Gh=class{constructor(e){this.variableNames=["dy","W"],this.outputShape=e.inShape;let t=e.filterHeight,o=e.filterWidth,n=e.strideHeight,s=e.strideWidth,a=t-1-e.padInfo.top,i=o-1-e.padInfo.left,p=e.outChannels/e.inChannels;this.userCode=`
const ivec2 pads = ivec2(${a}, ${i});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d1 = coords[3];
ivec2 dyCorner = coords.yz - pads;
int dyRCorner = dyCorner.x;
int dyCCorner = dyCorner.y;
float dotProd = 0.0;
for (int wR = 0; wR < ${t}; wR++) {
float dyR = float(dyRCorner + wR) / ${n}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
int wRPerm = ${t} - 1 - wR;
for (int wC = 0; wC < ${o}; wC++) {
float dyC = float(dyCCorner + wC) / ${s}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${o} - 1 - wC;
// TO DO: Vec4 over the channelMul
for (int dm = 0; dm < ${p}; dm++) {
int d2 = d1 * ${p} + dm;
float xValue = getDy(batch, idyR, idyC, d2);
float wValue = getW(wRPerm, wCPerm, d1, dm);
dotProd += xValue * wValue;
}
}
}
setOutput(dotProd);
}
`}};function SQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,dy:s}=e,{strides:a,dilations:i,pad:p,dimRoundingMode:u,filterShape:c}=o,l=I.computeConv2DInfo(n.shape,c,a,i,p,u,!0),m=new Uh(l);return t.runWebGLProgram(m,[n,s],"float32")}var SA={kernelName:hp,backendName:"webgl",kernelFunc:SQ};function vQ(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{strides:a,dilations:i,pad:p,dimRoundingMode:u,inputShape:c}=o,l=I.computeConv2DInfo(c,s.shape,a,i,p,u,!0),m=new Gh(l);return t.runWebGLProgram(m,[n,s],"float32")}var vA={kernelName:gp,backendName:"webgl",kernelFunc:vQ};var Hh=class{constructor(e){this.variableNames=["X"],this.outputShape=[e,e],this.userCode=`
void main() {
ivec2 coords = getOutputCoords();
float val = coords[0] == coords[1] ? getX(coords[0]) : 0.0;
setOutput(val);
}
`}};function kQ(r){let{inputs:e,backend:t}=r,{x:o}=e,n=[...o.shape,...o.shape],s=x.sizeFromShape(o.shape),a=J({inputs:{x:o},backend:t,attrs:{shape:[s]}}),i=new Hh(s),p=t.runWebGLProgram(i,[a],a.dtype),u=J({inputs:{x:p},backend:t,attrs:{shape:n}});return t.disposeIntermediateTensorInfo(a),t.disposeIntermediateTensorInfo(p),u}var kA={kernelName:xp,backendName:"webgl",kernelFunc:kQ};var qh=class{constructor(e){this.variableNames=["x","W"],this.outputShape=e.outShape;let{inHeight:t,inWidth:o,padInfo:n,strideHeight:s,strideWidth:a,filterHeight:i,filterWidth:p,dilationHeight:u,dilationWidth:c}=e,{top:l,left:m}=n;this.userCode=`
const ivec2 strides = ivec2(${s}, ${a});
const ivec2 pads = ivec2(${l}, ${m});
const float neg_infinity = -3.4e38;
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
int d1 = coords.w;
ivec2 outTopLeftCorner =
coords.yz * strides - pads;
int hBeg = outTopLeftCorner.x;
int wBeg = outTopLeftCorner.y;
float curVal = neg_infinity;
for (int h = 0; h < ${i}; h++) {
int hIn = hBeg + h * ${u};
if (hIn >= 0 && hIn < ${t}) {
for (int w = 0; w < ${p}; w++) {
int wIn = wBeg + w * ${c};
if (wIn >= 0 && wIn < ${o}) {
float xVal = getX(batch, hIn, wIn, d1);
float wVal = getW(h, w, d1);
float val = xVal + wVal;
if (val > curVal) {
curVal = val;
}
}
}
}
}
float result = curVal;
setOutput(result);
}
`}};function TQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dilations:p}=o,u=I.computeDilation2DInfo(n.shape,s.shape,a,i,"NHWC",p),c,l=new qh(u);c=t.runWebGLProgram(l,[n,s],"float32");let m=J({inputs:{x:c},backend:t,attrs:{shape:u.outShape}});return t.disposeIntermediateTensorInfo(c),m}var TA={kernelName:yp,backendName:"webgl",kernelFunc:TQ};function NQ(r){let{inputs:e,backend:t,attrs:o}=r,{equation:n}=o,s=e,{allDims:a,summedDims:i,idDims:p}=I.decodeEinsumEquation(n,s.length);I.checkEinsumDimSizes(a.length,p,s);let{path:u,steps:c}=I.getEinsumComputePath(i,p),l=c.length,m=null,f=a.length,d=[];for(let h=0;h<l;++h){for(let g of c[h]){let{permutationIndices:y,expandDims:b}=I.getEinsumPermutation(f,p[g]),C;I.isIdentityPermutation(y)?C=s[g]:(C=xt({inputs:{x:s[g]},backend:t,attrs:{perm:y}}),d.push(C));let w=C.shape.slice();for(let k=0;k<b.length;++k)w.splice(b[k],0,1);x.arraysEqual(C.shape,w)||(C=J({inputs:{x:C},backend:t,attrs:{shape:w}}),d.push(C)),m===null?m=C:(m=Pl({inputs:{a:C,b:m},backend:t}),d.push(m))}h<l-1&&(u[h]>=0&&(m=Ou({inputs:{x:m},backend:t,attrs:{axis:u[h]-(a.length-f),keepDims:!1}}),d.push(m)),f--)}for(let h of d)h!==m&&t.disposeIntermediateTensorInfo(h);return m}var NA={kernelName:Xa,backendName:"webgl",kernelFunc:NQ};var _Q="return (x >= 0.0) ? x : (exp(x) - 1.0);",EQ=`
vec4 result;
result.r = (x.r >= 0.0) ? x.r : (exp(x.r) - 1.0);
result.g = (x.g >= 0.0) ? x.g : (exp(x.g) - 1.0);
result.b = (x.b >= 0.0) ? x.b : (exp(x.b) - 1.0);
result.a = (x.a >= 0.0) ? x.a : (exp(x.a) - 1.0);
return result;
`,$Q=he({opSnippet:_Q,packedOpSnippet:EQ}),_A={kernelName:In,backendName:"webgl",kernelFunc:$Q};var RQ="return (b >= 1.0) ? a : a * (b + 1.0);",AQ=`
vec4 bGTEZero = vec4(greaterThanEqual(b, vec4(0.)));
return (bGTEZero * a) + ((vec4(1.0) - bGTEZero) * (a * (b + vec4(1.0))));
`,FQ=r=>{let{inputs:e,backend:t}=r,{dy:o,y:n}=e,s=P().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new Ko(AQ,o.shape,n.shape):new _o(RQ,o.shape,n.shape);return t.runWebGLProgram(s,[o,n],o.dtype)},EA={kernelName:Pm,backendName:"webgl",kernelFunc:FQ};var DQ=`
return vec4(equal(a, b));
`,PQ="return float(a == b);",OQ=ot({opSnippet:PQ,packedOpSnippet:DQ,dtype:"bool",cpuKernelImpl:c$}),$A={kernelName:oo,backendName:"webgl",kernelFunc:OQ};var MQ=`
// Error function is calculated approximately with elementary function.
// See "Handbook of Mathematical Functions with Formulas,
// Graphs, and Mathematical Tables", Abramowitz and Stegun.
float p = ${I.ERF_P};
float a1 = ${I.ERF_A1};
float a2 = ${I.ERF_A2};
float a3 = ${I.ERF_A3};
float a4 = ${I.ERF_A4};
float a5 = ${I.ERF_A5};
float sign = sign(x);
x = abs(x);
float t = 1.0 / (1.0 + p * x);
return sign * (1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x));
`,LQ=he({opSnippet:MQ}),RA={kernelName:Gi,backendName:"webgl",kernelFunc:LQ};var BQ=jo+`
return exp(x);
`,VQ=`
vec4 result = exp(x);
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,Ow=he({opSnippet:BQ,packedOpSnippet:VQ,cpuKernelImpl:l$,dtype:"float32"}),AA={kernelName:no,backendName:"webgl",kernelFunc:Ow};function Kh(r){let{inputs:e,attrs:t,backend:o}=r,{dim:n}=t,{input:s}=e,a=s.shape.length,i=s.shape.slice(),p=n;return n<0&&(x.assert(-(a+1)<=n,()=>`Axis must be in the interval [${-(a+1)}, ${a}]`),p=a+n+1),i.splice(p,0,1),J({inputs:{x:s},backend:o,attrs:{shape:i}})}var FA={kernelName:xs,backendName:"webgl",kernelFunc:Kh};var DA="return exp(x) - 1.0;",zQ=he({opSnippet:DA,packedOpSnippet:DA,cpuKernelImpl:m$}),PA={kernelName:wn,backendName:"webgl",kernelFunc:zQ};var Ll=class{constructor(e,t,o){this.variableNames=["real","imag"];let n=t[1];this.outputShape=t;let s=o?`2.0 * ${Math.PI}`:`-2.0 * ${Math.PI}`,a=o?`${n}.0`:"1.0",i;if(e==="real")i="return real * expR - imag * expI;";else if(e==="imag")i="return real * expI + imag * expR;";else throw new Error(`FFT component must be either "real" or "imag", got ${e}.`);this.userCode=`
const float exponentMultiplier = ${s};
float unaryOpComplex(float real, float expR, float imag, float expI) {
${i}
}
float mulMatDFT(int batch, int index) {
float indexRatio = float(index) / float(${n});
float exponentMultiplierTimesIndexRatio =
exponentMultiplier * indexRatio;
float result = 0.0;
for (int i = 0; i < ${n}; i++) {
// x = (-2|2 * PI / N) * index * i;
float x = exponentMultiplierTimesIndexRatio * float(i);
float expR = cos(x);
float expI = sin(x);
float real = getReal(batch, i);
float imag = getImag(batch, i);
result +=
unaryOpComplex(real, expR, imag, expI) / ${a};
}
return result;
}
void main() {
ivec2 coords = getOutputCoords();
setOutput(mulMatDFT(coords[0], coords[1]));
}
`}};function jh(r,e,t){let o=t.texData.get(r.dataId),n=x.sizeFromShape(r.shape),s=r.shape[r.shape.length-1],a=n/s,i=J({inputs:{x:r},backend:t,attrs:{shape:[a,s]}}),p=i.shape,u=new Ll("real",p,e),c=new Ll("imag",p,e),l=[{dataId:o.complexTensorInfos.real.dataId,dtype:o.complexTensorInfos.real.dtype,shape:p},{dataId:o.complexTensorInfos.imag.dataId,dtype:o.complexTensorInfos.imag.dtype,shape:p}],m=t.runWebGLProgram(u,l,"float32"),f=t.runWebGLProgram(c,l,"float32"),d=Ar({inputs:{real:m,imag:f},backend:t});t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(f);let h=J({inputs:{x:d},backend:t,attrs:{shape:r.shape}});return t.disposeIntermediateTensorInfo(i),t.disposeIntermediateTensorInfo(d),h}function WQ(r){let{inputs:e,backend:t}=r,{input:o}=e;return jh(o,!1,t)}var OA={kernelName:bp,backendName:"webgl",kernelFunc:WQ};var Xh=class{constructor(e,t){this.outputShape=[],this.customUniforms=[{name:"value",type:"float"}],this.variableNames=["x"],this.outputShape=e,this.userCode=`
void main() {
// Input can be obtained from uniform value.
setOutput(value);
}
`}};function Ba(r){let{backend:e,attrs:t}=r,{shape:o,value:n}=t,{dtype:s}=t;if(s=s||x.inferDtype(n),s==="string"){let a=x.getArrayFromDType(s,x.sizeFromShape(o));return a.fill(n),e.makeTensorInfo(o,s,a)}else{let a=new Xh(o,n),i=[[n]];return e.runWebGLProgram(a,[],s,i)}}var MA={kernelName:ys,backendName:"webgl",kernelFunc:Ba};var Yh=class{constructor(e){this.variableNames=["Image"],this.outputShape=[];let t=e[2];this.outputShape=e,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int x = coords[2];
int coordX = ${t} - x - 1;
float outputValue;
if(coordX >= 0 && coordX < ${t}) {
outputValue = getImage(coords[0], coords[1], coordX, coords[3]);
} else {
outputValue = getImage(coords[0], coords[1], coords[2], coords[3]);
}
setOutput(outputValue);
}
`}};var LA={kernelName:Sn,backendName:"webgl",kernelFunc:({inputs:r,backend:e})=>{let{image:t}=r,o=e,n=new Yh(t.shape);return o.runWebGLProgram(n,[t],t.dtype)}};var BA="return floor(x);",UQ=he({opSnippet:BA,packedOpSnippet:BA,cpuKernelImpl:f$}),VA={kernelName:so,backendName:"webgl",kernelFunc:UQ};var GQ=`
float s = sign(a) * sign(b);
int ia = round(a);
int ib = round(b);
if (ib != 0) {
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
return float(idiv(ia, ib, s));
} else {
return NAN;
}
`,HQ=`
ivec4 ia = round(a);
ivec4 ib = round(b);
bvec4 cond = notEqual(ib, ivec4(0));
ivec4 result = ivec4(0);
vec4 s = sign(a) * sign(b);
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
if (cond[0]) {
result[0] = idiv(ia[0], ib[0], s[0]);
}
if (cond[1]) {
result[1] = idiv(ia[1], ib[1], s[1]);
}
if (cond[2]) {
result[2] = idiv(ia[2], ib[2], s[2]);
}
if (cond[3]) {
result[3] = idiv(ia[3], ib[3], s[3]);
}
return vec4(result);
`,qQ=ot({opSnippet:GQ,packedOpSnippet:HQ,dtype:"int32"}),zA={kernelName:vn,backendName:"webgl",kernelFunc:qQ};var Qh=class{constructor(e){this.variableNames=["A"];let t=Ct(),[o,n]=e;this.outputShape=e,this.userCode=`
void main() {
ivec3 coords = getOutputCoords();
int texR = coords[0];
int texC = coords[1];
int depth = coords[2];
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${n}.0, ${o}.0);
vec4 values = ${t.texture2D}(A, uv);
float value;
if (depth == 0) {
value = values.r;
} else if (depth == 1) {
value = values.g;
} else if (depth == 2) {
value = values.b;
} else if (depth == 3) {
value = values.a;
}
setOutput(floor(value * 255.0 + 0.5));
}
`}};var Zh=class{constructor(e){this.variableNames=["A"],this.packedInputs=!1,this.packedOutput=!0;let t=Ct(),[o,n]=e;this.outputShape=e,this.userCode=`
void main() {
ivec3 coords = getOutputCoords();
int texR = coords[0];
int texC = coords[1];
int depth = coords[2];
vec4 result = vec4(0.);
for(int row=0; row<=1; row++) {
for(int col=0; col<=1; col++) {
texC = coords[1] + row;
depth = coords[2] + col;
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${n}.0, ${o}.0);
vec4 values = ${t.texture2D}(A, uv);
float value;
if (depth == 0) {
value = values.r;
} else if (depth == 1) {
value = values.g;
} else if (depth == 2) {
value = values.b;
} else if (depth == 3) {
value = values.a;
}
result[row * 2 + col] = floor(value * 255.0 + 0.5);
}
}
${t.output} = result;
}
`}};var WA={kernelName:Zi,backendName:"webgl",kernelFunc:KQ},Sc,Mw=P().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");function KQ(r){let{inputs:e,backend:t,attrs:o}=r,{pixels:n}=e,{numChannels:s}=o,a=typeof HTMLVideoElement!="undefined"&&n instanceof HTMLVideoElement,i=typeof HTMLImageElement!="undefined"&&n instanceof HTMLImageElement,[p,u]=a?[n.videoWidth,n.videoHeight]:[n.width,n.height],c=[u,p],l=[u,p,s];if(i||a){let h=P().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");(Sc==null||h!==Mw)&&(Mw=h,Sc=document.createElement("canvas").getContext("2d",{willReadFrequently:Mw})),Sc.canvas.width=p,Sc.canvas.height=u,Sc.drawImage(n,0,0,p,u),n=Sc.canvas}let m=t.makeTensorInfo(c,"int32");t.texData.get(m.dataId).usage=ir.PIXELS,t.gpgpu.uploadPixelDataToTexture(t.getTexture(m.dataId),n);let f=P().getBool("WEBGL_PACK")?new Zh(l):new Qh(l),d=t.runWebGLProgram(f,[m],"int32");return t.disposeData(m.dataId),d}function jQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dataFormat:c,dilations:l,dimRoundingMode:m,activation:f,leakyreluAlpha:d}=o,h=I.convertConv2DDataFormat(c),g=I.computeConv2DInfo(n.shape,s.shape,p,l,u,m,!1,h),y,b=[],C=a!=null,w=i!=null,k=f==="leakyrelu",_=()=>{let R=[n,s],A=(D,O)=>{if(O==="NCHW"&&D.shape.length===1&&D.shape[0]!==1){let M=J({inputs:{x:D},backend:t,attrs:{shape:[D.shape[0],1,1]}});return b.push(M),M}return D};if(C&&R.push(A(a,c)),w&&R.push(A(i,c)),k){let D=t.makeTensorInfo([],"float32",x.createScalarValue(d,"float32"));R.push(D),b.push(D)}return R};if(g.filterHeight===1&&g.filterWidth===1&&g.dilationHeight===1&&g.dilationWidth===1&&g.strideHeight===1&&g.strideWidth===1&&(g.padInfo.type==="SAME"||g.padInfo.type==="VALID"))y=Dh({x:n,filter:s,convInfo:g,backend:t,bias:a,activation:f,preluActivationWeights:i,leakyreluAlpha:d});else if(g.strideWidth<=2&&h==="channelsLast"&&P().getBool("WEBGL_EXP_CONV")){let R=f?Ma(f,!0):null,A=new Cc(g,C,R,w,k),D=[[g.padInfo.top,g.padInfo.left],[g.strideHeight,g.strideWidth],[g.dilationHeight,g.dilationWidth],[g.inHeight,g.inWidth]],O=_();y=t.runWebGLProgram(A,O,"float32",D)}else if(P().getBool("WEBGL_CONV_IM2COL"))y=Ph({x:n,filter:s,convInfo:g,backend:t,bias:a,activation:f,preluActivationWeights:i,leakyreluAlpha:d});else{let R=f?Ma(f,!1):null,A=new bc(g,C,R,w,k),D=_();y=t.runWebGLProgram(A,D,"float32")}let E=J({inputs:{x:y},backend:t,attrs:{shape:g.outShape}});return b.push(y),b.forEach(R=>t.disposeIntermediateTensorInfo(R)),E}var UA={kernelName:Do,backendName:"webgl",kernelFunc:jQ};function XQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dilations:c,dimRoundingMode:l,activation:m,leakyreluAlpha:f}=o,d=[],h=c;h==null&&(h=[1,1]),x.assert(I.eitherStridesOrDilationsAreOne(p,h),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${p} and dilations '${h}'`);let g=I.computeConv2DInfo(n.shape,s.shape,p,h,u,l,!0),y=P().getBool("WEBGL_PACK_DEPTHWISECONV")&&g.strideWidth<=2&&g.outChannels/g.inChannels===1,b=m?Ma(m,y):null,C=[n,s],w=a!=null,k=i!=null,_=m==="leakyrelu";if(w&&C.push(a),k&&C.push(i),_){let D=t.makeTensorInfo([],"float32",x.createScalarValue(f,"float32"));C.push(D),d.push(D)}let E;y?E=new wc(g,w,b,k,_):E=new Ic(g,w,b,k,_);let R=[[g.padInfo.top,g.padInfo.left],[g.strideHeight,g.strideWidth],[g.dilationHeight,g.dilationWidth],[g.inHeight,g.inWidth]],A=t.runWebGLProgram(E,C,"float32",R);return d.forEach(D=>t.disposeIntermediateTensorInfo(D)),A}var GA={kernelName:Po,backendName:"webgl",kernelFunc:XQ};var Jh=class{constructor(e,t,o,n){this.sliceDim=e,this.strides=t,this.paramsShape=n,this.variableNames=["x","indices"],this.outputShape=o;let s=_e(o.length),a=`
int index;`;for(let i=0;i<this.sliceDim;i++)a+=`
index = round(getIndices(coords[0], ${i}));
out_of_bounds = out_of_bounds || index < 0;
out_of_bounds = out_of_bounds || index >= ${this.paramsShape[i]};
flattenIndex += index * ${this.strides[i]};`;this.userCode=`
void main() {
${s} coords = getOutputCoords();
int flattenIndex = 0;
bool out_of_bounds = false;
${a}
setOutput(out_of_bounds ? 0.0 : getX(flattenIndex, coords[1]));
}
`}};function YQ(r){let{inputs:e,backend:t}=r,{params:o,indices:n}=e,s=n.shape,a=s[s.length-1],i=x.sizeFromShape(o.shape),[p,u,c,l]=I.prepareAndValidate(o,n),m=J({inputs:{x:n},backend:t,attrs:{shape:[u,a]}}),f=J({inputs:{x:o},backend:t,attrs:{shape:[x.sizeFromShape(o.shape)/c,c]}});if(t.shouldExecuteOnCPU([o,n])||o.dtype==="string"){let y=t.readSync(n.dataId),b=t.bufferSync(o),C=d$(y,b,o.dtype,u,a,c,l,o.shape,i);return t.makeTensorInfo(p,o.dtype,C.values)}let d=new Jh(a,l,[u,c],o.shape),h=t.runWebGLProgram(d,[f,m],f.dtype),g=J({inputs:{x:h},backend:t,attrs:{shape:p}});return t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(f),t.disposeIntermediateTensorInfo(h),g}var HA={kernelName:Tn,backendName:"webgl",kernelFunc:YQ};var eg=class{constructor(e,t){this.variableNames=["A","indices"],this.outputShape=t,this.rank=t.length;let o=_e(this.rank),n=QQ(e,2);this.userCode=`
void main() {
${o} resRC = getOutputCoords();
int index = int(getIndices(resRC.x, resRC.z));
float inBounds = (index >= 0) && (index < ${e[2]}) ? 1.0 : 0.0;
setOutput(inBounds * getA(${n}));
}
`}};function QQ(r,e){let t=["resRC.x","resRC.y","resRC.z","resRC.w"],o=[];for(let n=0;n<r.length;n++)n===2?o.push("index"):o.push(`${t[n]}`);return o.join()}function Lw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,indices:s}=e,{axis:a,batchDims:i}=o,p=x.parseAxisParam(a,n.shape)[0];if(P().get("DEBUG")){let b=t.readSync(s.dataId),C=n.shape[p];for(let w=0;w<b.length;++w){let k=b[w];x.assert(k<=C-1&&k>=0,()=>`GatherV2: the index value ${k} is not in [0, ${C-1}]`)}}let u=I.segment_util.collectGatherOpShapeInfo(n,s,p,i),c=x.sizeFromShape(s.shape),l=[],m=J({inputs:{x:n},backend:t,attrs:{shape:[u.batchSize,u.outerSize,u.dimSize,u.sliceSize]}}),f=J({inputs:{x:s},backend:t,attrs:{shape:[u.batchSize,c/u.batchSize]}});l.push(m),l.push(f);let d=[u.batchSize,u.outerSize,c/u.batchSize,u.sliceSize];if(t.shouldExecuteOnCPU([n,s])||n.dtype==="string"){let b=t.bufferSync(f),C=t.bufferSync(m),w=h$(C,b,d);return l.forEach(k=>t.disposeIntermediateTensorInfo(k)),t.makeTensorInfo(u.outputShape,w.dtype,w.values)}let h=new eg(m.shape,d),g=t.runWebGLProgram(h,[m,f],m.dtype);l.push(g);let y=J({inputs:{x:g},backend:t,attrs:{shape:u.outputShape}});return l.forEach(b=>t.disposeIntermediateTensorInfo(b)),y}var qA={kernelName:bs,backendName:"webgl",kernelFunc:Lw};var ZQ="return float(a > b);",JQ=`
return vec4(greaterThan(a, b));
`,e7=ot({opSnippet:ZQ,packedOpSnippet:JQ,cpuKernelImpl:g$,dtype:"bool"}),KA={kernelName:ao,backendName:"webgl",kernelFunc:e7};var t7="return float(a >= b);",r7=`
return vec4(greaterThanEqual(a, b));
`,o7=ot({opSnippet:t7,packedOpSnippet:r7,dtype:"bool",cpuKernelImpl:x$}),jA={kernelName:io,backendName:"webgl",kernelFunc:o7};function n7(r){let{inputs:e,backend:t}=r,{input:o}=e;return jh(o,!0,t)}var XA={kernelName:Cp,backendName:"webgl",kernelFunc:n7};var s7="return float(!isnan(x) && !isinf(x));",a7=he({opSnippet:s7,dtype:"bool"}),YA={kernelName:Hi,backendName:"webgl",kernelFunc:a7};var i7="return float(isinf(x));",u7=he({opSnippet:i7,dtype:"bool"}),QA={kernelName:qi,backendName:"webgl",kernelFunc:u7};var p7="return float(isnan(x));",c7=he({opSnippet:p7,dtype:"bool"}),ZA={kernelName:ia,backendName:"webgl",kernelFunc:c7};var l7="return float(a < b);",m7=`
return vec4(lessThan(a, b));
`,f7=ot({opSnippet:l7,packedOpSnippet:m7,cpuKernelImpl:y$,dtype:"bool"}),JA={kernelName:po,backendName:"webgl",kernelFunc:f7};var d7="return float(a <= b);",h7=`
return vec4(lessThanEqual(a, b));
`,g7=ot({opSnippet:d7,packedOpSnippet:h7,cpuKernelImpl:b$,dtype:"bool"}),eF={kernelName:co,backendName:"webgl",kernelFunc:g7};function x7(r){let{backend:e,attrs:t}=r,{start:o,stop:n,num:s}=t,a=C$(o,n,s);return e.makeTensorInfo([a.length],"float32",a)}var tF={kernelName:Ip,backendName:"webgl",kernelFunc:x7};var y7=jo+`
return x < 0.0 ? 0./0. : log(x);
`,b7=`
vec4 result = log(x);
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : (x.r < 0.0 ? 0./0. : result.r);
result.g = isNaN.g ? x.g : (x.g < 0.0 ? 0./0. : result.g);
result.b = isNaN.b ? x.b : (x.b < 0.0 ? 0./0. : result.b);
result.a = isNaN.a ? x.a : (x.a < 0.0 ? 0./0. : result.a);
return result;
`,C7=he({opSnippet:y7,packedOpSnippet:b7,cpuKernelImpl:I$}),rF={kernelName:lo,backendName:"webgl",kernelFunc:C7};var I7=jo+`
return log(1.0 + x);
`,w7=he({opSnippet:I7}),oF={kernelName:Ki,backendName:"webgl",kernelFunc:w7};var S7="return float(a >= 1.0 && b >= 1.0);",v7=`
return vec4(
vec4(greaterThanEqual(a, vec4(1.0))) *
vec4(greaterThanEqual(b, vec4(1.0))));
`,k7=ot({opSnippet:S7,packedOpSnippet:v7,dtype:"bool"}),nF={kernelName:_n,backendName:"webgl",kernelFunc:k7};var T7="return float(!(x >= 1.0));",N7=he({opSnippet:T7}),sF={kernelName:En,backendName:"webgl",kernelFunc:N7};var _7="return float(a >= 1.0 || b >= 1.0);",E7=`
return min(
vec4(greaterThanEqual(a, vec4(1.0))) +
vec4(greaterThanEqual(b, vec4(1.0))),
vec4(1.0));
`,$7=ot({opSnippet:_7,packedOpSnippet:E7,dtype:"bool"}),aF={kernelName:ua,backendName:"webgl",kernelFunc:$7};var tg=class{constructor(e,t,o,n,s){this.variableNames=["x"],this.outputShape=[];let a=t,i=e[3]-1;this.outputShape=e;let p,u=`float(${o}) + float(${n}) * sum`;s===.5?p=`inversesqrt(${u})`:s===1?p=`1.0/(${u})`:p=`exp(log(${u}) * float(-${s}));`,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int r = coords[1];
int c = coords[2];
int d = coords[3];
float x = getX(b, r, c, d);
float sum = 0.0;
for (int j = -${a}; j <= ${a}; j++) {
int idx = d + j;
if (idx >= 0 && idx <= ${i}) {
float z = getX(b, r, c, idx);
sum += z * z;
}
}
float val = x * ${p};
setOutput(val);
}
`}};var rg=class{constructor(e,t,o,n,s){this.variableNames=["x"],this.outputShape=[],this.packedInputs=!0,this.packedOutput=!0;let a=t,i=e[3]-1;this.outputShape=e;let p,u=`float(${o}) + float(${n}) * sum`;s===.5?p=`inversesqrt(${u})`:s===1?p=`1.0/(${u})`:p=`exp(log(${u}) * float(-${s}));`,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int b = coords.x;
int r = coords.y;
int c = coords.z;
int d = coords.w;
bool hasNextCol = d < ${this.outputShape[3]};
bool hasNextRow = c < ${this.outputShape[2]};
vec4 sum = vec4(0.);
vec4 xFragAtOutputCoords = getX(b, r, c, d);
vec4 xAtOutputCoords = vec4(
getChannel(xFragAtOutputCoords, vec2(c, d)),
hasNextCol ?
getChannel(xFragAtOutputCoords, vec2(c, d + 1)) : 0.0,
hasNextRow ?
getChannel(xFragAtOutputCoords , vec2(c + 1, d)) : 0.0,
(hasNextRow && hasNextCol) ?
getChannel(xFragAtOutputCoords, vec2(c + 1, d + 1)) : 0.0
);
int firstChannel = d - ${a};
vec2 cache = vec2(0.);
if(firstChannel >= 0){
vec4 firstChannelFrag = getX(b, r, c, firstChannel);
cache.x = getChannel(firstChannelFrag, vec2(c, firstChannel));
if(hasNextRow){
cache.y = getChannel(firstChannelFrag, vec2(c + 1, firstChannel));
}
}
ivec2 depth = ivec2(d, d + 1);
for (int j = - ${a}; j <= ${a}; j++) {
ivec2 idx = depth + j;
bvec2 aboveLowerBound = greaterThanEqual(idx, ivec2(0));
bvec2 belowUpperBound = lessThanEqual(idx, ivec2(${i}));
bool depthInRange = aboveLowerBound.x && belowUpperBound.x;
bool depthPlusOneInRange = aboveLowerBound.y && belowUpperBound.y;
if(depthInRange || depthPlusOneInRange){
vec4 z = vec4(0.);
vec4 xFragAtCurrentDepth;
z.xz = cache.xy;
if(depthPlusOneInRange && hasNextCol){
xFragAtCurrentDepth = idx.y != d ?
getX(b, r, c, idx.y) : xFragAtOutputCoords;
z.y = getChannel(xFragAtCurrentDepth, vec2(c, idx.y));
if(hasNextRow){
z.w = getChannel(xFragAtCurrentDepth, vec2(c + 1, idx.y));
}
}
cache.xy = z.yw;
sum += z * z;
}
}
vec4 result = xAtOutputCoords * ${p};
setOutput(result);
}
`}};var R7=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{depthRadius:s,bias:a,alpha:i,beta:p}=o,u=P().getBool("WEBGL_PACK_NORMALIZATION")?new rg(n.shape,s,a,i,p):new tg(n.shape,s,a,i,p);return t.runWebGLProgram(u,[n],n.dtype)},iF={kernelName:wp,backendName:"webgl",kernelFunc:R7};var og=class{constructor(e,t,o,n,s){this.variableNames=["inputImage","outputImage","dy"],this.outputShape=[],this.outputShape=e,this.depth=e[3],this.depthRadius=t,this.bias=o,this.alpha=n,this.beta=s,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int r = coords[1];
int c = coords[2];
float result = 0.0;
for (int d = 0; d < ${this.depth}; ++d) {
int depthBegin = int(max(0.0, float(d - ${t})));
int depthEnd = int(min(float(${this.depth}),
float(d + ${t} + 1)));
const int MIN_DEPTH_BEGIN = 0;
const int MAX_DEPTH_END = ${this.depth};
float norm = 0.0;
for (int k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; ++k) {
if (k < depthBegin){
continue;
}
else if (k >= depthBegin && k < depthEnd) {
norm += getInputImage(b, r, c, k) * getInputImage(b, r, c, k);
}
else {
break;
}
}
norm = float(${n}) * norm + float(${o});
for(int k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; ++k){
if (k < depthBegin){
continue;
}
else if (k >= depthBegin && k < depthEnd){
float dyi = -2.0 * float(${n})
* float(${s})
* getInputImage(b ,r ,c, k) * getOutputImage(b, r, c, d)
/ norm;
if (k == d) {
dyi += pow(norm, -1.0 * ${s});
}
if (k == coords[3]) {
dyi *= getDy(b, r, c, d);
result += dyi;
}
}
else {
break;
}
}
}
setOutput(result);
}
`}};var A7=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n,y:s,dy:a}=e,{depthRadius:i,bias:p,alpha:u,beta:c}=o,l=new og(n.shape,i,p,u,c);return t.runWebGLProgram(l,[n,s,a],n.dtype)},uF={kernelName:Om,backendName:"webgl",kernelFunc:A7};function pF(r,e,t,o){let n=x.sizeFromShape(e),a=x.sizeFromShape(r.shape)/n,i=J({inputs:{x:r},attrs:{shape:[a,n]},backend:o}),p=qr(i,r.dtype,"max",o),u=J({inputs:{x:p},attrs:{shape:t},backend:o});return o.disposeIntermediateTensorInfo(i),o.disposeIntermediateTensorInfo(p),u}function Bw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reductionIndices:s,keepDims:a}=o,i=n.shape.length,p=x.parseAxisParam(s,n.shape),u=p,c=I.getAxesPermutation(u,i),l=c!=null,m=t.shouldExecuteOnCPU([n]),f=n;if(l){if(m){let C=t.texData.get(f.dataId).values,w=new Array(i);for(let E=0;E<w.length;E++)w[E]=n.shape[c[E]];let k=Du(C,n.shape,n.dtype,c,w);f=t.makeTensorInfo(w,n.dtype);let _=t.texData.get(f.dataId);_.values=k}else f=_i(n,c,t);u=I.getInnerMostAxes(u.length,i)}I.assertAxesAreInnerMostDims("max",u,i);let[d,h]=I.computeOutAndReduceShapes(f.shape,u),g=d;a&&(g=I.expandShapeToKeepDim(d,p));let y;if(m){let C=t.texData.get(f.dataId).values,w=w$(C,x.sizeFromShape(h),g,n.dtype);y=t.makeTensorInfo(g,n.dtype);let k=t.texData.get(y.dataId);k.values=w}else y=pF(f,h,g,t);return l&&t.disposeIntermediateTensorInfo(f),y}var cF={kernelName:$n,backendName:"webgl",kernelFunc:Bw};var F7=gc+`
return max(a, b);
`,D7=`
vec4 result = vec4(max(a, b));
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
`+js+`
return result;
`,P7=ot({opSnippet:F7,packedOpSnippet:D7,cpuKernelImpl:S$}),lF={kernelName:mo,backendName:"webgl",kernelFunc:P7};function O7(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e;as(n,"maxPool");let{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1;x.assert(I.eitherStridesOrDilationsAreOne(a,u),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${u}'`);let c=I.computePool2DInfo(n.shape,s,a,u,i,p);if(c.filterWidth===1&&c.filterHeight===1&&x.arraysEqual(c.inShape,c.outShape))return Rt({inputs:{x:n},backend:t});let l=new us(c,"max",!1);return t.runWebGLProgram(l,[n],n.dtype)}var mF={kernelName:Rn,backendName:"webgl",kernelFunc:O7};function M7(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dataFormat:p,dimRoundingMode:u}=o,c=[1,1,1],l=I.computePool3DInfo(n.shape,s,a,c,i,u,p),m=new Ei(l,"max",!1);return t.runWebGLProgram(m,[n],n.dtype)}var fF={kernelName:Sp,backendName:"webgl",kernelFunc:M7};var ng=class{constructor(e){this.variableNames=["dy","maxPos"],this.outputShape=e.inShape;let t=e.strideHeight,o=e.strideWidth,n=e.dilationHeight,s=e.effectiveFilterHeight,a=e.effectiveFilterWidth,i=s-1-e.padInfo.top,p=a-1-e.padInfo.left,u=s*a-1;this.userCode=`
const ivec2 pads = ivec2(${i}, ${p});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 dyRCCorner = coords.yz - pads;
int dyRCorner = dyRCCorner.x;
int dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${s};
wR += ${n}) {
float dyR = float(dyRCorner + wR) / ${t}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${a}; wC++) {
float dyC = float(dyCCorner + wC) / ${o}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(b, idyR, idyC, d);
int maxPosValue = ${u} - int(getMaxPos(b, idyR, idyC, d));
// Get the current value, check it against the value from the
// position matrix.
int curPosValue = wR * ${a} + wC;
float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
dotProd += dyValue * mask;
}
}
setOutput(dotProd);
}
`}},sg=class{constructor(e){this.variableNames=["dy","maxPos"],this.outputShape=e.inShape;let t=e.strideDepth,o=e.strideHeight,n=e.strideWidth,s=e.dilationDepth,a=e.dilationHeight,i=e.dilationWidth,p=e.effectiveFilterDepth,u=e.effectiveFilterHeight,c=e.effectiveFilterWidth,l=p-1-e.padInfo.front,m=u-1-e.padInfo.top,f=c-1-e.padInfo.left,d=p*u*c-1;this.userCode=`
const ivec3 pads = ivec3(${l}, ${m}, ${f});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyDCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, ch) with pos mask(:, :, :, d) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wD = 0; wD < ${p};
wD += ${s}) {
float dyD = float(dyDCorner + wD) / ${t}.0;
if (dyD < 0.0 || dyD >= ${e.outDepth}.0 || fract(dyD) > 0.0) {
continue;
}
int idyD = int(dyD);
for (int wR = 0; wR < ${u};
wR += ${a}) {
float dyR = float(dyRCorner + wR) / ${o}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 ||
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${c};
wC += ${i}) {
float dyC = float(dyCCorner + wC) / ${n}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(batch, idyD, idyR, idyC, ch);
int maxPosValue = ${d} -
int(getMaxPos(batch, idyD, idyR, idyC, ch));
// Get the current value, check it against the value from the
// position matrix.
int curPosValue =
wD * ${u} * ${c} +
wR * ${c} + wC;
float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
dotProd += dyValue * mask;
}
}
}
setOutput(dotProd);
}
`}};function L7(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s}=e,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=[1,1,1],m=I.computePool3DInfo(a.shape,i,p,l,u,c),f=new Ei(m,"max",!0),d=t.runWebGLProgram(f,[a],a.dtype),h=new sg(m),g=t.runWebGLProgram(h,[n,d],a.dtype);return t.disposeIntermediateTensorInfo(d),g}var dF={kernelName:Lm,backendName:"webgl",kernelFunc:L7};function B7(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s,output:a}=e,i=s;as([s,a],"maxPoolGrad");let{filterSize:p,strides:u,pad:c,dimRoundingMode:l}=o,m=I.computePool2DInfo(i.shape,p,u,1,c,l),f=!0,d=new us(m,"max",f),h=t.runWebGLProgram(d,[i],i.dtype),g=new ng(m),y=t.runWebGLProgram(g,[n,h],i.dtype);return t.disposeIntermediateTensorInfo(h),y}var hF={kernelName:Mm,backendName:"webgl",kernelFunc:B7};function gF(r,e,t,o){let n=new us(t,"max",!1),s=o.runWebGLProgram(n,[r],"float32");n=new us(t,"max",!0,!0,e);let a=o.runWebGLProgram(n,[r],"float32");return[s,a]}var xF={kernelName:vp,backendName:"webgl",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o}=r,{filterSize:n,strides:s,pad:a,includeBatchInIndex:i}=e,p=t;x.assert(o.shape.length===4,()=>`Error in maxPool: input must be rank 4 but got rank ${o.shape.length}.`);let u=[1,1];x.assert(I.eitherStridesOrDilationsAreOne(s,u),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${s} and dilations '${u}'`);let c=I.computePool2DInfo(o.shape,n,s,u,a),[l,m]=gF(o,i,c,p);return[l,m]}};function yF(r,e,t,o){let n=x.sizeFromShape(e),a=x.sizeFromShape(r.shape)/n,i=J({inputs:{x:r},attrs:{shape:[a,n]},backend:o}),p=qr(i,"float32","mean",o),u=J({inputs:{x:p},attrs:{shape:t},backend:o});return o.disposeIntermediateTensorInfo(i),o.disposeIntermediateTensorInfo(p),u}var bF={kernelName:An,backendName:"webgl",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o}=r,{keepDims:n,axis:s}=e,a=t,i=o.shape.length,p=x.parseAxisParam(s,o.shape),u=p,c=I.getAxesPermutation(u,i),l=c!=null,m=a.shouldExecuteOnCPU([o]),f=[],d=o;if(l){if(m){let w=a.texData.get(d.dataId).values,k=new Array(i);for(let R=0;R<k.length;R++)k[R]=o.shape[c[R]];let _=Du(w,o.shape,o.dtype,c,k);d=a.makeTensorInfo(k,o.dtype);let E=a.texData.get(d.dataId);E.values=_}else d=_i(o,c,a);f.push(d),u=I.getInnerMostAxes(u.length,i)}I.assertAxesAreInnerMostDims("sum",u,i);let[h,g]=I.computeOutAndReduceShapes(d.shape,u),y=h;n&&(y=I.expandShapeToKeepDim(h,p));let b=yF(d,g,y,a);for(let C of f)a.disposeIntermediateTensorInfo(C);return b}};function V7(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=x.parseAxisParam(s,n.shape),u=p,c=I.getAxesPermutation(u,i),l=n;c!=null&&(l=xt({inputs:{x:n},backend:t,attrs:{perm:c}}),u=I.getInnerMostAxes(u.length,n.shape.length)),I.assertAxesAreInnerMostDims("min",u,i);let[m,f]=I.computeOutAndReduceShapes(l.shape,u),d=x.sizeFromShape(f),h=J({inputs:{x:l},backend:t,attrs:{shape:[-1,d]}}),g=qr(h,h.dtype,"min",t),y;if(a){let b=I.expandShapeToKeepDim(m,p);y=J({inputs:{x:g},backend:t,attrs:{shape:b}})}else y=J({inputs:{x:g},backend:t,attrs:{shape:m}});return t.disposeIntermediateTensorInfo(h),t.disposeIntermediateTensorInfo(g),c!=null&&t.disposeIntermediateTensorInfo(l),y}var CF={kernelName:Fn,backendName:"webgl",kernelFunc:V7};var z7=gc+`
return min(a, b);
`,W7=`
vec4 result = vec4(min(a, b));
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
`+js+`
return result;
`,U7=ot({opSnippet:z7,packedOpSnippet:W7,cpuKernelImpl:v$}),IF={kernelName:fo,backendName:"webgl",kernelFunc:U7};var ag=class{constructor(e,t,o){this.variableNames=["x"],this.outputShape=t.map((c,l)=>c[0]+e[l]+c[1]);let n=e.length,s=_e(n),a=t.map(c=>c[0]).join(","),i=t.map((c,l)=>c[0]+e[l]).join(","),p=["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,n),u=o==="reflect"?0:1;if(n===1){this.userCode=`
int start = ${a};
int end = ${i};
void main() {
int outC = getOutputCoords();
if (outC < start) {
outC = start * 2 - outC - ${u};
} else if(outC >= end) {
outC = (end - 1) * 2 - outC + ${u};
}
setOutput(getX(outC - start));
}
`;return}this.userCode=`
${s} start = ${s}(${a});
${s} end = ${s}(${i});
void main() {
${s} outC = getOutputCoords();
for (int i = 0; i < ${n}; i++) {
if (outC[i] < start[i]) {
outC[i] = start[i] * 2 - outC[i] - ${u};
} else if(outC[i] >= end[i]) {
outC[i] = (end[i] - 1) * 2 - outC[i] + ${u};
}
}
${s} coords = outC - start;
setOutput(getX(${p}));
}
`}};var ig=class{constructor(e,t,o){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=t.map((d,h)=>d[0]+e[h]+d[1]);let n=e.length,s=_e(n),a=t.map(d=>d[0]).join(","),i=t.map((d,h)=>d[0]+e[h]).join(","),p=$t("rc",n),u=$t("source",n),c=`${p[n-1]} < ${this.outputShape[n-1]}`,l=n===1?"source":`vec2(${u.slice(-2).join()})`,m=o==="reflect"?0:1,f="";if(n===1){let d=`
${s} source = rc;
if (source < start) {
source = start * 2 - source - ${m};
} else if (source >= end) {
source = (end - 1) * 2 - source + ${m};
}
source -= start;
`;f=`
${s} rc = outputLoc;
${d}
result[0] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
${d}
result[1] = getChannel(getX(${u.join()}), ${l});
}
`}else{let d=`
${s} source = rc;
${s} lt = ${s}(lessThan(source, start));
${s} gte = ${s}(greaterThanEqual(source, end));
${s} orig = 1 - (lt + gte);
source = orig * source +
lt * (start * 2 - source - ${m}) +
gte * ((end - 1) * 2 - source + ${m});
source -= start;
`;f=`
${s} rc = outputLoc;
${d}
result[0] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
${d}
result[1] = getChannel(getX(${u.join()}), ${l});
}
rc = outputLoc;
${p[n-2]} += 1;
if(${p[n-2]} < ${this.outputShape[n-2]}) {
${d}
result[2] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
${d}
result[3] = getChannel(getX(${u.join()}), ${l});
}
}
`}this.userCode=`
const ${s} start = ${s}(${a});
const ${s} end = ${s}(${i});
void main() {
${s} outputLoc = getOutputCoords();
vec4 result = vec4(0.);
${f}
setOutput(result);
}
`}};var G7=({inputs:r,backend:e,attrs:t})=>{let{x:o}=r,{paddings:n,mode:s}=t,a=P().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new ig(o.shape,n,s):new ag(o.shape,n,s);return e.runWebGLProgram(a,[o],o.dtype)},wF={kernelName:Dn,backendName:"webgl",kernelFunc:G7};var H7=`if (b == 0.0) return NAN;
return mod(a, b);`,q7=`
vec4 result = mod(a, b);
bvec4 isNaN = equal(b, vec4(0.0));
`+js+`
return result;
`,K7=ot({opSnippet:H7,packedOpSnippet:q7}),SF={kernelName:ji,backendName:"webgl",kernelFunc:K7};var ug=class{constructor(e,t,o){this.variableNames=["probs"],this.customUniforms=[{name:"seed",type:"float"}],this.outputShape=[e,o],this.userCode=`
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
float r = random(seed);
float cdf = 0.0;
for (int i = 0; i < ${t-1}; i++) {
cdf += getProbs(batch, i);
if (r < cdf) {
setOutput(float(i));
return;
}
}
// If no other event happened, last event happened.
setOutput(float(${t-1}));
}
`}};var j7=`
if (a == b) {
return 1.0;
};
return a / b;`,X7=`
// vec4 one = vec4(equal(a, b));
// return one + (vec4(1.0) - one) * a / b;
vec4 result = a / b;
if(a.x == b.x) {
result.x = 1.;
}
if(a.y == b.y) {
result.y = 1.;
}
if(a.z == b.z) {
result.z = 1.;
}
if(a.w == b.w) {
result.w = 1.;
}
return result;
`,Vw=ot({opSnippet:j7,packedOpSnippet:X7,checkOutOfBounds:!0}),vF={kernelName:Cn,backendName:"webgl",kernelFunc:Vw};var kF="return a - b;",zw=ot({opSnippet:kF,packedOpSnippet:kF,supportsComplex:!0,cpuKernelImpl:G$}),TF={kernelName:Io,backendName:"webgl",kernelFunc:zw};function Ww(r){let{inputs:e,backend:t,attrs:o}=r,{logits:n}=e,{dim:s}=o,a=x.parseAxisParam([s],n.shape),i=Bw({inputs:{x:n},backend:t,attrs:{reductionIndices:a,keepDims:!1}}),p=I.expandShapeToKeepDim(i.shape,a),u=J({inputs:{x:i},backend:t,attrs:{shape:p}}),c=zw({inputs:{a:n,b:u},backend:t}),l=Ow({inputs:{x:c},backend:t}),m=Ou({inputs:{x:l},backend:t,attrs:{axis:a,keepDims:!1}}),f=J({inputs:{x:m},backend:t,attrs:{shape:p}}),d=Vw({inputs:{a:l,b:f},backend:t});return t.disposeIntermediateTensorInfo(i),t.disposeIntermediateTensorInfo(u),t.disposeIntermediateTensorInfo(c),t.disposeIntermediateTensorInfo(l),t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(f),d}var NF={kernelName:Xn,backendName:"webgl",kernelFunc:Ww};function Y7(r){let{inputs:e,backend:t,attrs:o}=r,{logits:n}=e,{numSamples:s,seed:a,normalized:i}=o,p=i?n:Ww({inputs:{logits:n},backend:t,attrs:{dim:n.shape.length-1}}),u=p.shape[0],c=p.shape[1],l=new ug(u,c,s),m=[[a]],f=t.runWebGLProgram(l,[p],"int32",m);return i||t.disposeIntermediateTensorInfo(p),f}var _F={kernelName:kp,backendName:"webgl",kernelFunc:Y7};var Q7=Vt+`
return -x;
`,Z7=`
vec4 result = -x;
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`;function J7(r){let{inputs:e,backend:t}=r,{x:o}=e;if(t.shouldExecuteOnCPU([o])){let s=t.texData.get(o.dataId),[a,i]=T$(s.values,o.shape,o.dtype);return t.makeTensorInfo(i,o.dtype,a)}let n;return P().getBool("WEBGL_PACK_UNARY_OPERATIONS")?n=new No(o.shape,Z7):n=new fr(o.shape,Q7),t.runWebGLProgram(n,[o],o.dtype)}var EF={kernelName:Pn,backendName:"webgl",kernelFunc:J7};var eZ=Bt.nonMaxSuppressionV3Impl;function tZ(r){I.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p}=o,u=t.readSync(n.dataId),c=t.readSync(s.dataId),{selectedIndices:l}=eZ(u,c,a,i,p);return t.makeTensorInfo([l.length],"int32",new Int32Array(l))}var $F={kernelName:On,backendName:"webgl",kernelFunc:tZ};var rZ=Bt.nonMaxSuppressionV4Impl;function oZ(r){I.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,padToMaxOutputSize:u}=o,c=t.readSync(n.dataId),l=t.readSync(s.dataId),{selectedIndices:m,validOutputs:f}=rZ(c,l,a,i,p,u);return[t.makeTensorInfo([m.length],"int32",new Int32Array(m)),t.makeTensorInfo([],"int32",new Int32Array([f]))]}var RF={kernelName:pa,backendName:"webgl",kernelFunc:oZ};var nZ=Bt.nonMaxSuppressionV5Impl;function sZ(r){I.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,softNmsSigma:u}=o,c=t.readSync(n.dataId),l=t.readSync(s.dataId),m=a,f=i,d=p,h=u,{selectedIndices:g,selectedScores:y}=nZ(c,l,m,f,d,h);return[t.makeTensorInfo([g.length],"int32",new Int32Array(g)),t.makeTensorInfo([y.length],"float32",new Float32Array(y))]}var AF={kernelName:Mn,backendName:"webgl",kernelFunc:sZ};var pg=class{constructor(e,t,o,n){this.variableNames=["indices"],this.outputShape=[e,t],this.userCode=`
void main() {
ivec2 coords = getOutputCoords();
int index = round(getIndices(coords.x));
setOutput(mix(float(${n}), float(${o}),
float(index == coords.y)));
}
`}};var aZ=r=>{let{inputs:e,backend:t,attrs:o}=r,{indices:n}=e,{dtype:s,depth:a,onValue:i,offValue:p}=o,u=x.sizeFromShape(n.shape),c=new pg(u,a,i,p),l=J({inputs:{x:n},backend:t,attrs:{shape:[u]}}),m=t.runWebGLProgram(c,[l],s);t.disposeIntermediateTensorInfo(l);let f=[...n.shape,a],d=J({inputs:{x:m},backend:t,attrs:{shape:f}});return t.disposeIntermediateTensorInfo(m),d},FF={kernelName:ca,backendName:"webgl",kernelFunc:aZ};function Bl(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="complex64"){let n=La({inputs:{input:o},backend:t}),s=Bl({inputs:{x:n},backend:t}),a=Lu({inputs:{input:o},backend:t}),i=Bl({inputs:{x:a},backend:t}),p=Ar({inputs:{real:s,imag:i},backend:t});return t.disposeIntermediateTensorInfo(n),t.disposeIntermediateTensorInfo(s),t.disposeIntermediateTensorInfo(a),t.disposeIntermediateTensorInfo(i),p}else return Ba({attrs:{shape:o.shape,dtype:o.dtype,value:o.dtype==="string"?"":0},backend:t})}var DF={kernelName:Es,backendName:"webgl",kernelFunc:Bl};function PF(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="string")throw new Error("onesLike is not supported under string dtype");if(o.dtype==="complex64"){let n=La({inputs:{input:o},backend:t}),s=PF({inputs:{x:n},backend:t}),a=Lu({inputs:{input:o},backend:t}),i=Bl({inputs:{x:a},backend:t}),p=Ar({inputs:{real:s,imag:i},backend:t});return t.disposeIntermediateTensorInfo(n),t.disposeIntermediateTensorInfo(s),t.disposeIntermediateTensorInfo(a),t.disposeIntermediateTensorInfo(i),p}else return Ba({attrs:{shape:o.shape,dtype:o.dtype,value:1},backend:t})}var OF={kernelName:Cs,backendName:"webgl",kernelFunc:PF};function iZ(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o;if(e.length===1)return Kh({inputs:{input:e[0]},backend:t,attrs:{dim:n}});let s=e[0].shape,a=e[0].dtype;e.forEach(c=>{x.assertShapesMatch(s,c.shape,"All tensors passed to stack must have matching shapes"),x.assert(a===c.dtype,()=>"All tensors passed to stack must have matching dtypes")});let i=[],p=e.map(c=>{let l=Kh({inputs:{input:c},backend:t,attrs:{dim:n}});return i.push(l),l}),u=Pw({inputs:p,backend:t,attrs:{axis:n}});return i.forEach(c=>t.disposeIntermediateTensorInfo(c)),u}var MF={kernelName:Is,backendName:"webgl",kernelFunc:iZ};var cg=class{constructor(e,t,o){this.variableNames=["x"],this.customUniforms=[{name:"value",type:"float"}],this.outputShape=t.map((u,c)=>u[0]+e[c]+u[1]);let n=e.length,s=_e(n),a=t.map(u=>u[0]).join(","),i=t.map((u,c)=>u[0]+e[c]).join(","),p=["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,n);if(n===1){this.userCode=`
int start = ${a};
int end = ${i};
void main() {
int outC = getOutputCoords();
if (outC < start || outC >= end) {
setOutput(value);
} else {
setOutput(getX(outC - start));
}
}
`;return}this.userCode=`
${s} start = ${s}(${a});
${s} end = ${s}(${i});
void main() {
${s} outC = getOutputCoords();
if (any(lessThan(outC, start)) || any(greaterThanEqual(outC, end))) {
setOutput(value);
} else {
${s} coords = outC - start;
setOutput(getX(${p}));
}
}
`}};var lg=class{constructor(e,t,o){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"value",type:"float"}],this.outputShape=t.map((h,g)=>h[0]+e[g]+h[1]);let n=e.length,s=_e(n),a=t.map(h=>h[0]).join(","),i=t.map((h,g)=>h[0]+e[g]).join(","),p=$t("rc",n),u=$t("source",n),c=`${p[n-1]} < ${this.outputShape[n-1]}`,l=n===1?"source":`vec2(${u.slice(-2).join()})`,m=[`${s} rc = outputLoc;`,`${p[n-1]} += 1;
if(${c}) {
`,n===1?"":`}
rc = outputLoc;
${p[n-2]} += 1;
if(${p[n-2]} < ${this.outputShape[n-2]}) {`,n===1?"":` ${p[n-1]} += 1;
if(${c}) {`],f=n===1?"rc < start || rc >= end":"any(lessThan(rc, start)) || any(greaterThanEqual(rc, end))",d="";for(let h=0,g=n===1?2:4;h<g;h++)d+=`
${m[h]}
if (${f}) {
result[${h}] = float(value);
} else {
${s} source = rc - start;
result[${h}] = getChannel(getX(${u.join()}), ${l});
}
`;d+=n===1?"} ":"}}",this.userCode=`
const ${s} start = ${s}(${a});
const ${s} end = ${s}(${i});
void main() {
${s} outputLoc = getOutputCoords();
vec4 result = vec4(0.);
${d}
setOutput(result);
}
`}};var Uw=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{paddings:s,constantValue:a}=o;if(x.sizeFromShape(n.shape)===0){let u=s.map((c,l)=>c[0]+n.shape[l]+c[1]);return Ba({backend:t,attrs:{shape:u,value:a,dtype:n.dtype}})}let i=P().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new lg(n.shape,s,a):new cg(n.shape,s,a),p=[[a]];return t.runWebGLProgram(i,[n],n.dtype,p)},LF={kernelName:Ln,backendName:"webgl",kernelFunc:Uw};var uZ=`
if(a < 0.0 && floor(b) < b){
return NAN;
}
if (b == 0.0) {
return 1.0;
}
return (round(mod(b, 2.0)) != 1) ?
pow(abs(a), b) : sign(a) * pow(abs(a), b);
`,pZ=`
// isModRound1 has 1 for components with round(mod(b, 2.0)) == 1, 0 otherwise.
vec4 isModRound1 = vec4(equal(round(mod(b, 2.0)), ivec4(1)));
vec4 multiplier = sign(a) * isModRound1 + (vec4(1.0) - isModRound1);
vec4 result = multiplier * pow(abs(a), b);
// Ensure that a^0 = 1, including 0^0 = 1 as this correspond to TF and JS
bvec4 isExpZero = equal(b, vec4(0.0));
result.r = isExpZero.r ? 1.0 : result.r;
result.g = isExpZero.g ? 1.0 : result.g;
result.b = isExpZero.b ? 1.0 : result.b;
result.a = isExpZero.a ? 1.0 : result.a;
bvec4 isNaN1 = lessThan(a, vec4(0.0));
bvec4 isNaN2 = lessThan(floor(b), b);
bvec4 isNaN = bvec4(isNaN1.x && isNaN2.x, isNaN1.y && isNaN2.y, isNaN1.z && isNaN2.z, isNaN1.w && isNaN2.w);
`+js+`
return result;
`,cZ=ot({opSnippet:uZ,packedOpSnippet:pZ}),BF={kernelName:Bn,backendName:"webgl",kernelFunc:cZ};function lZ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=[],u=x.parseAxisParam(s,n.shape),c=u,l=I.getAxesPermutation(c,i),m=n;l!=null&&(m=xt({inputs:{x:n},backend:t,attrs:{perm:l}}),c=I.getInnerMostAxes(c.length,i),p.push(m)),I.assertAxesAreInnerMostDims("prod",c,i);let f;if(t.shouldExecuteOnCPU([m])){let d=t.texData.get(m.dataId).values,{outVals:h,outShape:g,outDtype:y}=_$(m.shape,m.dtype,d,c);f=t.makeTensorInfo(g,y,h)}else{let[d,h]=I.computeOutAndReduceShapes(m.shape,c),g=x.sizeFromShape(h),y=J({inputs:{x:m},backend:t,attrs:{shape:[-1,g]}}),b=Ca(n.dtype),C=qr(y,b,"prod",t);f=J({inputs:{x:C},backend:t,attrs:{shape:d}}),p.push(y),p.push(C)}if(a){p.push(f);let d=I.expandShapeToKeepDim(f.shape,u);f=J({inputs:{x:f},backend:t,attrs:{shape:d}})}return p.forEach(d=>t.disposeIntermediateTensorInfo(d)),f}var VF={kernelName:Ao,backendName:"webgl",kernelFunc:lZ};function mZ(r){let{inputs:e,backend:t,attrs:o}=r,{paramsNestedSplits:n,paramsDenseValues:s,indices:a}=e,{outputRaggedRank:i}=o,p=n.map(y=>t.readSync(y.dataId)),u=n.map(y=>y.shape),c=t.readSync(s.dataId),l=t.readSync(a.dataId),[m,f,d]=E$(p,u,c,s.shape,s.dtype,l,a.shape,i),h=m.map(y=>t.makeTensorInfo([y.length],"int32",y)),g=t.makeTensorInfo(d,s.dtype,f);return h.concat([g])}var zF={kernelName:Tp,backendName:"webgl",kernelFunc:mZ};function fZ(r){let{inputs:e,backend:t}=r,{starts:o,limits:n,deltas:s}=e,a=t.readSync(o.dataId),i=t.readSync(n.dataId),p=t.readSync(s.dataId),[u,c]=$$(a,o.shape,o.dtype,i,n.shape,p,s.shape),l=t.makeTensorInfo([u.length],"int32",u),m=t.makeTensorInfo([c.length],o.dtype,c);return[l,m]}var WF={kernelName:Np,backendName:"webgl",kernelFunc:fZ};function dZ(r){let{inputs:e,backend:t,attrs:o}=r,{shape:n,values:s,defaultValue:a,rowPartitionTensors:i}=e,{rowPartitionTypes:p}=o,u=t.readSync(n.dataId),c=t.readSync(s.dataId),l=t.readSync(a.dataId),m=i.map(g=>t.readSync(g.dataId)),f=i.map(g=>g.shape),[d,h]=R$(u,n.shape,c,s.shape,s.dtype,l,a.shape,m,f,p);return t.makeTensorInfo(d,s.dtype,h)}var UF={kernelName:_p,backendName:"webgl",kernelFunc:dZ};var Gw=r=>{let{backend:e,attrs:t}=r,{start:o,stop:n,step:s,dtype:a}=t,i=A$(o,n,s,a);return e.makeTensorInfo([i.length],a,i)},GF={kernelName:ws,backendName:"webgl",kernelFunc:Gw};var hZ="return 1.0 / x;",gZ=he({opSnippet:hZ}),HF={kernelName:ma,backendName:"webgl",kernelFunc:gZ};var xZ=Vt+`
return (x < 0.0) ? 0.0 : x;
`,yZ=`
vec4 result = x * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,bZ=he({opSnippet:xZ,packedOpSnippet:yZ}),qF={kernelName:zn,backendName:"webgl",kernelFunc:bZ};var CZ=Vt+`
return (x < 0.0) ? 0.0 : min(6.0, x);
`,IZ=`
vec4 result = min(x, vec4(6.)) * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,wZ=he({opSnippet:CZ,packedOpSnippet:IZ}),KF={kernelName:Gn,backendName:"webgl",kernelFunc:wZ};var mg=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m;s?m="(vec2(yRC) + vec2(0.5)) * effectiveInputOverOutputRatioRC - vec2(0.5)":m="vec2(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
const vec2 effectiveInputOverOutputRatioRC = vec2(
${c[0]/l[0]},
${c[1]/l[1]});
const vec2 inputShapeRC = vec2(${i}.0, ${p}.0);
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 yRC = coords.yz;
// Fractional source index.
vec2 sourceFracIndexRC = ${m};
// Compute the four integer indices.
ivec2 sourceFloorRC = ivec2(max(sourceFracIndexRC, vec2(0.0)));
ivec2 sourceCeilRC = ivec2(
min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)));
float topLeft = getA(b, sourceFloorRC.x, sourceFloorRC.y, d);
float bottomLeft = getA(b, sourceCeilRC.x, sourceFloorRC.y, d);
float topRight = getA(b, sourceFloorRC.x, sourceCeilRC.y, d);
float bottomRight = getA(b, sourceCeilRC.x, sourceCeilRC.y, d);
vec2 fracRC = sourceFracIndexRC - vec2(sourceFloorRC);
float top = topLeft + (topRight - topLeft) * fracRC.y;
float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y;
float newValue = top + (bottom - top) * fracRC.x;
setOutput(newValue);
}
`}};var fg=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m;s?m="(vec3(yRC) + vec3(0.5)) * effectiveInputOverOutputRatioRC - vec3(0.5)":m="vec3(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
const vec3 effectiveInputOverOutputRatioRC = vec3(
${c[0]/l[0]},
${c[1]/l[1]},
${c[1]/l[1]});
const vec3 inputShapeRC = vec3(${i}.0, ${p}.0,
${p}.0);
float getAValue(int b, int r, int c, int d) {
return getChannel(getA(b, r, c, d), vec2(c, d));
}
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
// Calculate values for next column in yRC.z.
ivec3 yRC = coords.yzz + ivec3(0, 0, 1);
// Fractional source index.
vec3 sourceFracIndexRC = ${m};
// Compute the four integer indices.
ivec3 sourceFloorRC = ivec3(max(sourceFracIndexRC, vec3(0.0)));
ivec3 sourceCeilRC = ivec3(
min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)));
// Should we calculate next column and row elements in 2x2 packed cell.
bool hasNextCol = d < ${u-1};
bool hasNextRow = coords.z < ${o-1};
// In parallel, construct four corners for all four components in
// packed 2x2 cell.
vec4 topLeft = vec4(
getAValue(b, sourceFloorRC.x, sourceFloorRC.y, d),
hasNextCol ? getAValue(b, sourceFloorRC.x, sourceFloorRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceFloorRC.x, sourceFloorRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceFloorRC.x, sourceFloorRC.z, d + 1) : 0.0);
vec4 bottomLeft = vec4(
getAValue(b, sourceCeilRC.x, sourceFloorRC.y, d),
hasNextCol ? getAValue(b, sourceCeilRC.x, sourceFloorRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceCeilRC.x, sourceFloorRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceCeilRC.x, sourceFloorRC.z, d + 1) : 0.0);
vec4 topRight = vec4(
getAValue(b, sourceFloorRC.x, sourceCeilRC.y, d),
hasNextCol ? getAValue(b, sourceFloorRC.x, sourceCeilRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceFloorRC.x, sourceCeilRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceFloorRC.x, sourceCeilRC.z, d + 1) : 0.0);
vec4 bottomRight = vec4(
getAValue(b, sourceCeilRC.x, sourceCeilRC.y, d),
hasNextCol ? getAValue(b, sourceCeilRC.x, sourceCeilRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceCeilRC.x, sourceCeilRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceCeilRC.x, sourceCeilRC.z, d + 1) : 0.0);
vec3 fracRC = sourceFracIndexRC - vec3(sourceFloorRC);
vec4 top = mix(topLeft, topRight, fracRC.yyzz);
vec4 bottom = mix(bottomLeft, bottomRight, fracRC.yyzz);
vec4 newValue = mix(top, bottom, fracRC.x);
setOutput(newValue);
}
`}};function SZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=P().getBool("WEBGL_PACK_IMAGE_OPERATIONS")?new fg(n.shape,p,u,s,a):new mg(n.shape,p,u,s,a);return t.runWebGLProgram(c,[n],"float32")}var jF={kernelName:Un,backendName:"webgl",kernelFunc:SZ};var dg=class{constructor(e,t,o){this.variableNames=["dy"],this.outputShape=[],this.outputShape=t;let[,n,s]=t,[,a,i]=e,p=[o&&a>1?n-1:n,o&&i>1?s-1:s],u=[o&&a>1?a-1:a,o&&i>1?i-1:i],c=p[0]/u[0],l=p[1]/u[1],m=1/c,f=1/l,d=Math.ceil(m)*2+2,h=Math.ceil(f)*2+2;this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
int r = coords[1];
int c = coords[2];
float accumulator = 0.0;
const float heightScale = float(${c});
const float widthScale = float(${l});
const float invHeightScale = float(${m});
const float invWidthScale = float(${f});
const int winHeight = int(${d});
const int winWidth = int(${h});
// Compute bounds for where in dy we will look
float startRLerp = floor(float(r) * invHeightScale);
int startDyR = int(startRLerp - float(winHeight / 2));
float startCLerp = floor(float(c) * invWidthScale);
int startDyC = int(startCLerp - float(winWidth / 2));
// Loop over dy
for (int dyROffset = 0; dyROffset < winHeight; dyROffset++) {
int dyR = dyROffset + startDyR;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= ${a}) {
continue;
}
for (int dyCOffset = 0; dyCOffset < winWidth; dyCOffset++) {
int dyC = dyCOffset + startDyC;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= ${i}) {
continue;
}
float dxR = float(dyR) * heightScale;
int topDxRIndex = int(floor(dxR));
int bottomDxRIndex = int(min(ceil(dxR), ${n-1}.0));
float dxRLerp = dxR - float(topDxRIndex);
float inverseDxRLerp = 1.0 - dxRLerp;
float dxC = float(dyC) * widthScale;
int leftDxCIndex = int(floor(dxC));
int rightDxCIndex = int(min(ceil(dxC), ${s-1}.0));
float dxCLerp = dxC - float(leftDxCIndex);
float inverseDxCLerp = 1.0 - dxCLerp;
if (r == topDxRIndex && c == leftDxCIndex) {
// topLeft
accumulator +=
getDy(b, dyR, dyC, d) * inverseDxRLerp * inverseDxCLerp;
}
if (r == topDxRIndex && c == rightDxCIndex) {
// topRight
accumulator += getDy(b, dyR, dyC, d) * inverseDxRLerp * dxCLerp;
}
if (r == bottomDxRIndex && c == leftDxCIndex) {
// bottomLeft
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * inverseDxCLerp;
}
if (r == bottomDxRIndex && c == rightDxCIndex) {
// bottomRight
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * dxCLerp;
}
}
}
// End loop over dy
setOutput(accumulator);
}
`}};function vZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n,dy:s}=e,{alignCorners:a}=o,i=new dg(s.shape,n.shape,a);return t.runWebGLProgram(i,[s],s.dtype)}var XF={kernelName:Vm,backendName:"webgl",kernelFunc:vZ};var hg=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m=n?"0.5":"0.0",f;s?f="max((vec2(yRC) + vec2(0.5)) * effectiveInputOverOutputRatioRC, vec2(0.0))":f="vec2(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
const vec2 effectiveInputOverOutputRatioRC = vec2(
${c[0]/l[0]},
${c[1]/l[1]});
const vec2 inputShapeRC = vec2(${i}.0, ${p}.0);
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 yRC = coords.yz;
// Fractional source index.
vec2 sourceFracIndexRC = ${f};
// Compute the coordinators of nearest neighbor point.
ivec2 sourceNearestRC = ivec2(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + ${m})));
float newValue = getA(b, sourceNearestRC.x, sourceNearestRC.y, d);
setOutput(newValue);
}
`}};var gg=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m=n?"0.5":"0.0",f;s?f="max((vec3(yRC) + vec3(0.5)) * effectiveInputOverOutputRatioRC, vec3(0.0))":f="vec3(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
const vec3 effectiveInputOverOutputRatioRC = vec3(
${c[0]/l[0]},
${c[1]/l[1]},
${c[1]/l[1]});
const vec3 inputShapeRC = vec3(${i}.0, ${p}.0,
${p}.0);
float getAValue(int b, int r, int c, int d) {
return getChannel(getA(b, r, c, d), vec2(c, d));
}
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
// Calculate values for next column in yRC.z.
ivec3 yRC = coords.yzz + ivec3(0, 0, 1);
// Fractional source index.
vec3 sourceFracIndexRC = ${f};
// Compute the coordinators of nearest neighbor point.
ivec3 sourceNearestRC = ivec3(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + ${m})));
// Should we calculate next column and row elements in 2x2 packed cell.
bool hasNextCol = d < ${u-1};
bool hasNextRow = coords.z < ${o-1};
vec4 newValue = vec4(
getAValue(b, sourceNearestRC.x, sourceNearestRC.y, d),
hasNextCol ? getAValue(b, sourceNearestRC.x, sourceNearestRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceNearestRC.x, sourceNearestRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceNearestRC.x, sourceNearestRC.z, d + 1) : 0.0);
setOutput(newValue);
}
`}};function kZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=P().getBool("WEBGL_PACK_IMAGE_OPERATIONS")?new gg(n.shape,p,u,s,a):new hg(n.shape,p,u,s,a);return t.runWebGLProgram(c,[n],n.dtype)}var YF={kernelName:Wn,backendName:"webgl",kernelFunc:kZ};var xg=class{constructor(e,t,o){this.variableNames=["dy"],this.outputShape=[],this.outputShape=t;let[,n,s]=t,[,a,i]=e,p=[o&&a>1?n-1:n,o&&i>1?s-1:s],u=[o&&a>1?a-1:a,o&&i>1?i-1:i],c=p[0]/u[0],l=p[1]/u[1],m=1/c,f=1/l,d=Math.ceil(m)*2+2,h=Math.ceil(f)*2+2;this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
int r = coords[1];
int c = coords[2];
float accumulator = 0.0;
const float heightScale = float(${c});
const float widthScale = float(${l});
const float invHeightScale = float(${m});
const float invWidthScale = float(${f});
const int winHeight = int(${d});
const int winWidth = int(${h});
// Compute bounds for where in dy we will look
float startRLerp = floor(float(r) * invHeightScale);
int startDyR = int(floor(startRLerp - float(winHeight / 2)));
float startCLerp = floor(float(c) * invWidthScale);
int startDyC = int(floor(startCLerp - float(winWidth / 2)));
// Loop over dy
for (int dyROffset = 0; dyROffset < winHeight; dyROffset++) {
int dyR = dyROffset + startDyR;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= ${a}) {
continue;
}
for (int dyCOffset = 0; dyCOffset < winWidth; dyCOffset++) {
int dyC = dyCOffset + startDyC;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= ${i}) {
continue;
}
float sourceFracRow =
float(${p[0]}) *
(float(dyR) / float(${u[0]}));
float sourceFracCol =
float(${p[1]}) *
(float(dyC) / float(${u[1]}));
int sourceNearestRow = int(min(
float(int(${n}) - 1),
${o} ? float(round(sourceFracRow)) :
float(floor(sourceFracRow))));
int sourceNearestCol = int(min(
float(int(${s}) - 1),
${o} ? float(round(sourceFracCol)) :
float(floor(sourceFracCol))));
if (r == sourceNearestRow && c == sourceNearestCol) {
accumulator += getDy(b, dyR, dyC, d);
}
}
}
// End loop over dy
setOutput(accumulator);
}
`}};function TZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n,dy:s}=e,{alignCorners:a}=o,i=new xg(s.shape,n.shape,a);return t.runWebGLProgram(i,[s],s.dtype)}var QF={kernelName:Bm,backendName:"webgl",kernelFunc:TZ};var yg=class{constructor(e,t){this.variableNames=["x"];let o=e.length;if(o>4)throw new Error(`WebGL backend: Reverse of rank-${o} tensor is not yet supported`);if(this.outputShape=e,o===1){this.userCode=`
void main() {
int coord = getOutputCoords();
setOutput(getX(${e[0]} - coord - 1));
}
`;return}let n=i=>t.indexOf(i)!==-1&&e[i]!==1?`${e[i]} - coords[${i}] - 1`:`coords[${i}]`,s=e.map((i,p)=>n(p)).join(","),a=_e(o);this.userCode=`
void main() {
${a} coords = getOutputCoords();
setOutput(getX(${s}));
}
`}};var bg=class{constructor(e,t){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0;let o=e.length;if(o>4)throw new Error(`WebGL backend: Reverse of rank-${o} tensor is not yet supported`);this.outputShape=e;let n=$t("rc",o),s=`${n[o-1]} + 1 < ${this.outputShape[o-1]}`,a=`${n[o-2]} + 1 < ${this.outputShape[o-2]}`,i=_e(o);o===1?this.userCode=`
void main(){
int rc = getOutputCoords();
vec4 result = vec4(0.);
result.r = getChannel(getX(${e[0]} - rc - 1),
${e[0]} - rc - 1);
if(${s}){
result.g = getChannel(getX(${e[0]} - (rc + 1) - 1),
${e[0]} - (rc + 1) - 1);
}
setOutput(result);
}
`:this.userCode=`
void main() {
${i} rc = getOutputCoords();
vec4 result = vec4(0.);
result.r = ${p(n.slice())};
if(${s}){
result.g = ${u(n.slice())};
}
if(${a}) {
result.b = ${c(n.slice())};
if(${s}) {
result.a = ${l(n.slice())};
}
}
setOutput(result);
}
`;function p(d){return m(d)}function u(d){return d[o-1]="("+d[o-1]+" + 1)",m(d)}function c(d){return d[o-2]="("+d[o-2]+" + 1)",m(d)}function l(d){return d[o-1]="("+d[o-1]+" + 1)",d[o-2]="("+d[o-2]+" + 1)",m(d)}function m(d){let h=e.map((b,C)=>f(C,d)),g=h.join(","),y=h.slice(-2).join(",");return`getChannel(getX(${g}), vec2(${y}))`}function f(d,h){return t.indexOf(d)!==-1&&e[d]!==1?`${e[d]} - ${h[d]} - 1`:`${h[d]}`}}};function NZ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{dims:s}=o,a=n.shape.length,i=x.parseAxisParam(s,n.shape);if(a===0)return Rt({inputs:{x:n},backend:t});let p=P().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new bg(n.shape,i):new yg(n.shape,i);return t.runWebGLProgram(p,[n],n.dtype)}var ZF={kernelName:fa,backendName:"webgl",kernelFunc:NZ};var Cg=class{constructor(e,t){this.variableNames=["Image"],this.outputShape=[],this.customUniforms=[{name:"params",type:"vec4"}];let o=e[1],n=e[2];this.outputShape=e;let s="";typeof t=="number"?s=`float outputValue = ${t.toFixed(2)};`:s=`
vec3 fill = vec3(${t.join(",")});
float outputValue = fill[coords[3]];`,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int x = coords[2];
int y = coords[1];
float coordXFloat = (float(x) - params[0]) * params[3] -
(float(y) - params[1]) * params[2];
float coordYFloat = (float(x) - params[0]) * params[2] +
(float(y) - params[1]) * params[3];
int coordX = int(round(coordXFloat + params[0]));
int coordY = int(round(coordYFloat + params[1]));
${s}
if(coordX >= 0 && coordX < ${n} && coordY >= 0 && coordY < ${o}) {
outputValue = getImage(coords[0], coordY, coordX, coords[3]);
}
setOutput(outputValue);
}
`}};var JF={kernelName:es,backendName:"webgl",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{image:o}=r,{radians:n,fillValue:s,center:a}=e,i=t,p=new Cg(o.shape,s),[u,c]=I.getImageCenter(a,o.shape[1],o.shape[2]),l=[[u,c,Math.sin(n),Math.cos(n)]];return i.runWebGLProgram(p,[o],o.dtype,l)}};var _Z=`
// OpenGL ES does not support round function.
// The algorithm is based on banker's rounding.
float base = floor(x);
if ((x - base) < 0.5) {
return floor(x);
} else if ((x - base) > 0.5) {
return ceil(x);
} else {
if (mod(base, 2.0) == 0.0) {
return base;
} else {
return base + 1.0;
}
}
`,EZ=he({opSnippet:_Z}),eD={kernelName:da,backendName:"webgl",kernelFunc:EZ};var $Z="return inversesqrt(x);",RZ=he({opSnippet:$Z,cpuKernelImpl:F$}),tD={kernelName:xo,backendName:"webgl",kernelFunc:RZ};var vc=class{constructor(e,t,o,n,s,a,i=!0){this.variableNames=["updates","indices","defaultValue"],this.outputShape=a;let p=_e(s.length),u=_e(a.length),c="";o===1?c="i":o===2&&(c="i, j");let l=`getIndices(${c})`,m="";n===1?m="i":n===2&&(m="i, coords[1]");let f=`getUpdates(${m})`,d=t>1?"strides[j]":"strides";this.userCode=`
${p} strides = ${p}(${s});
void main() {
${u} coords = getOutputCoords();
float sum = 0.0;
bool found = false;
for (int i = 0; i < ${e}; i++) {
int flattenedIndex = 0;
for (int j = 0; j < ${t}; j++) {
int index = round(${l});
flattenedIndex += index * ${d};
}
if (flattenedIndex == coords[0]) {
sum += ${f};
found = true;
}
}
setOutput(mix(getDefaultValue(), sum, float(found)));
}
`}};function AZ(r){let{inputs:e,backend:t,attrs:o}=r,{indices:n,updates:s}=e,{shape:a}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=I.calculateShapes(s,n,a),m=[l/u,u];if(l===0)return t.makeTensorInfo(a,n.dtype);let f=J({inputs:{x:n},backend:t,attrs:{shape:[p,i]}}),d=J({inputs:{x:s},backend:t,attrs:{shape:[p,u]}}),h=t.makeTensorInfo([],"float32",new Float32Array([0])),g=new vc(p,i,f.shape.length,d.shape.length,c,m),y=t.runWebGLProgram(g,[d,f,h],d.dtype),b=J({inputs:{x:y},backend:t,attrs:{shape:a}});return t.disposeIntermediateTensorInfo(f),t.disposeIntermediateTensorInfo(d),t.disposeIntermediateTensorInfo(y),t.disposeIntermediateTensorInfo(h),b}var rD={kernelName:Hn,backendName:"webgl",kernelFunc:AZ};var Ig=class{constructor(e,t,o,n){this.variableNames=["sortedSequence","values"],this.customUniforms=[{name:"numInputs",type:"int"}],this.outputShape=[e,o];let s="while (left < right) {",a=`for (int i = 0; i < ${Math.ceil(Math.log2(t+1))}; ++i) { if (left >= right) break;`,i=P().getNumber("WEBGL_VERSION")===2?s:a,p=n==="left"?"<":"<=";this.userCode=`
int findBound(int batch, float value) {
int left = 0;
int right = numInputs;
int mid;
${i}
mid = (left + right) / 2;
if (getSortedSequence(batch, mid) ${p} value) {
left = mid + 1;
} else {
right = mid;
}
}
return right;
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int valueIndex = coords[1];
float value = getValues(batch, valueIndex);
setOutput(float(findBound(batch, value)));
}
`}};function FZ(r){let{inputs:e,backend:t,attrs:o}=r,{sortedSequence:n,values:s}=e,{side:a}=o,i=new Ig(n.shape[0],n.shape[1],s.shape[1],a),p=[[n.shape[1]]];return t.runWebGLProgram(i,[n,s],"int32",p)}var oD={kernelName:Ep,backendName:"webgl",kernelFunc:FZ};var wg=class{constructor(e,t,o){this.variableNames=["c","a","b"],this.outputShape=t;let n,s;if(o>4)throw Error(`Where for rank ${o} is not yet supported`);if(o===1)s="resRC",n="resRC";else{let i=["resRC.x","resRC.y","resRC.z","resRC.w"],p=[],u=[];for(let c=0;c<t.length;c++)u.push(`${i[c]}`),c<e&&p.push(`${i[c]}`);n=p.join(),s=u.join()}let a=_e(o);this.userCode=`
void main() {
${a} resRC = getOutputCoords();
float cVal = getC(${n});
if (cVal >= 1.0) {
setOutput(getA(${s}));
} else {
setOutput(getB(${s}));
}
}
`}};function DZ(r){let{inputs:e,backend:t}=r,{condition:o,t:n,e:s}=e,a=new wg(o.shape.length,n.shape,n.shape.length);return t.runWebGLProgram(a,[o,n,s],ct(n.dtype,s.dtype))}var nD={kernelName:vs,backendName:"webgl",kernelFunc:DZ};var PZ=`
// Stable and Attracting Fixed Point (0, 1) for Normalized Weights.
// see: https://arxiv.org/abs/1706.02515
float scaleAlpha = ${I.SELU_SCALEALPHA};
float scale = ${I.SELU_SCALE};
return (x >= 0.0) ? scale * x : scaleAlpha * (exp(x) - 1.0);
`,OZ=he({opSnippet:PZ}),sD={kernelName:Xi,backendName:"webgl",kernelFunc:OZ};var MZ=jo+`
return 1.0 / (1.0 + exp(-1.0 * x));
`,LZ=`
vec4 result = 1.0 / (1.0 + exp(-1.0 * x));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
`,BZ=he({opSnippet:MZ,packedOpSnippet:LZ,cpuKernelImpl:P$}),aD={kernelName:yo,backendName:"webgl",kernelFunc:BZ};var VZ=`
if (isnan(x)) { return 0.0; }
return sign(x);
`,zZ=he({opSnippet:VZ}),iD={kernelName:Yi,backendName:"webgl",kernelFunc:zZ};var WZ=jo+`
return sin(x);
`,UZ=he({opSnippet:WZ}),uD={kernelName:Kn,backendName:"webgl",kernelFunc:UZ};var GZ=`
float e2x = exp(x);
return (e2x - 1.0 / e2x) / 2.0;
`,HZ=he({opSnippet:GZ}),pD={kernelName:ha,backendName:"webgl",kernelFunc:HZ};var qZ=`
float epsilon = 1.1920928955078125e-7;
float threshold = log(epsilon) + 2.0;
bool too_large = x > -threshold;
bool too_small = x < threshold;
float result;
float exp_x = exp(x);
if (too_large){
result = x;
}
else if (too_small){
result = exp_x;
}
else{
result = log(exp_x + 1.0);
}
return result;
`,KZ=he({opSnippet:qZ}),cD={kernelName:Qi,backendName:"webgl",kernelFunc:KZ};var jZ=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,paddings:a}=o;x.assert(n.shape.length<=4,()=>"spaceToBatchND for rank > 4 with a WebGL backend not implemented yet");let i=s.reduce((y,b)=>y*b),p=[[0,0]];p.push(...a);for(let y=1+s.length;y<n.shape.length;++y)p.push([0,0]);let u=[],c=Uw({inputs:{x:n},backend:t,attrs:{paddings:p,constantValue:0}}),l=I.getReshaped(c.shape,s,i,!1),m=I.getPermuted(l.length,s.length,!1),f=I.getReshapedPermuted(c.shape,s,i,!1),d=J({inputs:{x:c},backend:t,attrs:{shape:l}}),h=xt({inputs:{x:d},backend:t,attrs:{perm:m}}),g=J({inputs:{x:h},backend:t,attrs:{shape:f}});return u.push(c),u.push(d),u.push(h),u.forEach(y=>t.disposeIntermediateTensorInfo(y)),g},lD={kernelName:ks,backendName:"webgl",kernelFunc:jZ};function XZ(r){let{inputs:e,backend:t}=r,{indices:o,values:n,denseShape:s,defaultValue:a}=e;if(s.shape.length!==1)throw new Error(`Dense shape must be a vector, saw:
${s.shape}`);if(o.shape.length!==2)throw new Error(`Indices must be a matrix, saw:
${o.shape}`);if(n.shape.length!==1)throw new Error(`Values must be a vector, saw:
${n.shape}`);if(a.shape.length!==0)throw new Error(`Default value must be a scalar, saw:
${a.shape}`);let i=t.readSync(o.dataId),p=t.readSync(n.dataId),u=t.readSync(s.dataId),c=t.readSync(a.dataId)[0],[l,m,f,d,h]=M$(i,o.shape,o.dtype,p,n.dtype,u,c);return[t.makeTensorInfo(m,o.dtype,l),t.makeTensorInfo([m[0]],n.dtype,f),t.makeTensorInfo([d.length],"bool",new Uint8Array(d.map(g=>Number(g)))),t.makeTensorInfo([h.length],o.dtype,new Int32Array(h))]}var mD={kernelName:Qa,backendName:"webgl",kernelFunc:XZ};function YZ(r){let{inputs:e,backend:t}=r,{inputIndices:o,inputShape:n,newShape:s}=e;if(o.shape.length!==2)throw new Error(`Input indices should be a matrix but received shape ${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape ${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=Array.from(t.readSync(n.dataId)),i=t.readSync(o.dataId),p=Array.from(t.readSync(s.dataId)),[u,c,l]=L$(i,o.shape,o.dtype,a,p);return[t.makeTensorInfo(c,o.dtype,u),t.makeTensorInfo([l.length],s.dtype,new Int32Array(l))]}var fD={kernelName:ga,backendName:"webgl",kernelFunc:YZ};function QZ(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);let a=t.readSync(o.dataId),i=t.readSync(n.dataId),p=t.readSync(s.dataId),[u,c]=sh(a,o.shape,o.dtype,i,p,!0);return t.makeTensorInfo(c,o.dtype,u)}var dD={kernelName:Za,backendName:"webgl",kernelFunc:QZ};function ZZ(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
${s.shape}`);let a=t.readSync(o.dataId),i=t.readSync(n.dataId),p=t.readSync(s.dataId),[u,c]=sh(a,o.shape,o.dtype,i,p);return t.makeTensorInfo(c,o.dtype,u)}var hD={kernelName:Ja,backendName:"webgl",kernelFunc:ZZ};function JZ(r){let{inputs:e,backend:t,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=e,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=I.calculateShapes(s,n,i),f=!1;if(s.dtype==="string"){let y=t.bufferSync(n),b=t.bufferSync(s),C=x.decodeString(t.readSync(a.dataId)[0]),w=D$(y,b,i,m,c,u,p,l,C,f);return t.makeTensorInfo(i,w.dtype,w.values)}let d=new vc(u,p,n.shape.length,s.shape.length,l,[m,1],f),h=t.runWebGLProgram(d,[s,n,a],s.dtype),g=J({inputs:{x:h},backend:t,attrs:{shape:i}});return t.disposeIntermediateTensorInfo(h),g}var gD={kernelName:ei,backendName:"webgl",kernelFunc:JZ};function e9(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=o,i=x.parseAxisParam(a,n.shape)[0],p=I.prepareSplitSize(n,s,i),u=n.shape.length,c=new Array(u).fill(0),l=n.shape.slice();return p.map(m=>{let f=[...l];f[i]=m;let d=ps({inputs:{x:n},backend:t,attrs:{begin:c,size:f}});return c[i]+=m,d})}var xD={kernelName:Ts,backendName:"webgl",kernelFunc:e9};var yD="return sqrt(x);",t9=he({opSnippet:yD,packedOpSnippet:yD,cpuKernelImpl:B$}),bD={kernelName:bo,backendName:"webgl",kernelFunc:t9};var r9="return x * x;",o9=he({opSnippet:r9}),CD={kernelName:ti,backendName:"webgl",kernelFunc:o9};var ID="return (a - b) * (a - b);",n9=ot({opSnippet:ID,packedOpSnippet:ID}),wD={kernelName:Co,backendName:"webgl",kernelFunc:n9};function s9({inputs:r,attrs:e,backend:t}){let{x:o}=r,n=Vt+`
return x > 0.0 ? 1.0 : float(${e.alpha});
`,s=new fr(o.shape,n);return t.runWebGLProgram(s,[o],o.dtype)}var SD={kernelName:$s,backendName:"webgl",kernelFunc:s9};var Sg=class{constructor(e,t,o){this.variableNames=["x"],this.outputShape=o;let n=o.length,s=_e(o.length),a=_e(o.length),i="";if(n===1)i="coords * strides + begin";else{let p=0;i=o.map((u,c)=>(p++,o.length===1?`coords * strides[${c}] + begin[${c}]`:`coords[${p-1}] * strides[${c}] + begin[${c}]`)).join(",")}this.userCode=`
${s} begin = ${s}(${e});
${s} strides = ${s}(${t});
void main() {
${a} coords = getOutputCoords();
setOutput(getX(${i}));
}
`}};function a9(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:f,finalShape:d,isIdentity:h,sliceDim0:g,isSimpleSlice:y,begin:b,end:C,strides:w}=et.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=J({inputs:{x:n},backend:t,attrs:{shape:d}});else if(g||y){x.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let E=et.computeOutShape(b,C,w),R=ps({inputs:{x:n},backend:t,attrs:{begin:b,size:E}});k=J({inputs:{x:R},backend:t,attrs:{shape:d}}),t.disposeIntermediateTensorInfo(R)}else if(t.shouldExecuteOnCPU([n])){let R=t.readSync(n.dataId),A=ne(n.shape,n.dtype,R),D=V$(f,A,w,b);k=t.makeTensorInfo(d,n.dtype,D.values)}else{let R=new Sg(b,w,f);k=t.runWebGLProgram(R,[n],n.dtype)}let _=J({inputs:{x:k},backend:t,attrs:{shape:d}});return t.disposeIntermediateTensorInfo(k),_}var vD={kernelName:Yn,backendName:"webgl",kernelFunc:a9};function i9(r){let{inputs:e,backend:t,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=e,m=t.readSync(c.dataId),f=t.readSync(l.dataId),[d,h]=z$(m,f,n,s,a,i,p,u);return[t.makeTensorInfo([d.length],"string",d),t.makeTensorInfo(l.shape,"int32",h)]}var kD={kernelName:Ns,backendName:"webgl",kernelFunc:i9};function u9(r){let{inputs:e,backend:t,attrs:o}=r,{skipEmpty:n}=o,{input:s,delimiter:a}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(s.shape.length!==1)throw new Error(`Input must be a vector, got shape: ${s.shape}`);if(a.shape.length!==0)throw new Error(`Delimiter must be a scalar, got shape: ${a.shape}`);let i=t.readSync(s.dataId),p=t.readSync(a.dataId)[0],[u,c,l]=W$(i,p,n),m=c.length;return[t.makeTensorInfo([m,2],"int32",u),t.makeTensorInfo([m],"string",c),t.makeTensorInfo([2],"int32",new Int32Array(l))]}var TD={kernelName:ri,backendName:"webgl",kernelFunc:u9};function p9(r){let{inputs:e,backend:t,attrs:o}=r,{numBuckets:n}=o,{input:s}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(n<=0)throw new Error("Number of buckets must be at least 1");let a=t.readSync(s.dataId),i=U$(a,n);return t.makeTensorInfo(s.shape,"int32",i)}var ND={kernelName:oi,backendName:"webgl",kernelFunc:p9};var c9="return tan(x);",l9=he({opSnippet:c9}),_D={kernelName:xa,backendName:"webgl",kernelFunc:l9};var m9=`
float e2x = exp(-2.0 * abs(x));
return sign(x) * (1.0 - e2x) / (1.0 + e2x);
`,f9=he({opSnippet:m9}),ED={kernelName:Qn,backendName:"webgl",kernelFunc:f9};var vg=class{constructor(e,t){this.variableNames=["A"];let o=new Array(e.length);for(let a=0;a<o.length;a++)o[a]=e[a]*t[a];this.outputShape=o,this.rank=o.length;let n=_e(this.rank),s=d9(e);this.userCode=`
void main() {
${n} resRC = getOutputCoords();
setOutput(getA(${s}));
}
`}};function d9(r){let e=r.length;if(e>5)throw Error(`Tile for rank ${e} is not yet supported`);if(e===1)return`imod(resRC, ${r[0]})`;let t=["resRC.x","resRC.y","resRC.z","resRC.w","resRC.u"],o=[];for(let n=0;n<r.length;n++)o.push(`imod(${t[n]}, ${r[n]})`);return o.join()}function Hw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reps:s}=o;if(n.dtype==="string"||n.shape.length>5){let p=t.readSync(n.dataId),u=n.dtype==="string"?p.map(m=>x.decodeString(m)):p,c=ne(n.shape,n.dtype,u),l=H$(c,s);return t.makeTensorInfo(l.shape,l.dtype,l.values)}let a=new vg(n.shape,s);return t.runWebGLProgram(a,[n],n.dtype)}var $D={kernelName:wo,backendName:"webgl",kernelFunc:Hw};var kg=class{constructor(e){this.variableNames=["x","indices"],this.customUniforms=[{name:"n",type:"int"},{name:"firstPass",type:"int"},{name:"negativeInf",type:"float"},{name:"dir",type:"int"},{name:"inc",type:"int"}],this.outputShape=e,this.userCode=`
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int elemIdx = coords[1];
// We compare elements pair-wise within a group of size 2 * inc.
// The comparing rule for each group alternates between ascending
// and descending. Within each group, we compare each pair at
// positions i and i+inc. To decide whether an element at position i
// is x0 or x1, we mod it by 2 * inc, if the result is smaller than
// inc, it is in the first half of the group, we denote it as x0,
// otherwise we denote it as x1.
// For example, as shown in the Bitonic top K paper referenced above,
// Figure5(a) shows that element[1] is in the
// second half of the group when group size is 2, but it is in the
// first half of the group when group size is 4.
bool isFirstInPair = imod(elemIdx, 2 * inc) < inc;
int i = isFirstInPair ? elemIdx : elemIdx - inc;
int i0 = firstPass == 1 ? i : int(getIndices(batch, i));
int i1 = firstPass == 1 ? i + inc : int(getIndices(batch, i + inc));
float x0 = i0 < n ? getX(batch, i0) : negativeInf;
float x1 = i1 < n ? getX(batch, i1) : negativeInf;
// Denotes which direction indices are in (ascending or descending).
bool reverse = imod(elemIdx, 2 * dir) >= dir;
bool isGreater = x0 > x1 || (x0 == x1 && i1 > i0);
if (reverse == isGreater) { // Elements in opposite order of direction
int iTemp = i0;
i0 = i1;
i1 = iTemp;
}
if (isFirstInPair) {
setOutput(float(i0));
} else {
setOutput(float(i1));
}
}
`}},Tg=class{constructor(e){this.variableNames=["x","indices"],this.customUniforms=[{name:"n",type:"int"},{name:"firstPass",type:"int"},{name:"k",type:"int"}],this.outputShape=e,this.userCode=`
void main() {
// Takes max of indices (0, k), (1, k + 1), (2, k + 2) ...
ivec2 coords = getOutputCoords();
int batch = coords[0];
int elemIdx = coords[1];
// The output size is half of the previous size.
// If the previous sequence is | | | | _ _ _ _ | | | | _ _ _ _ (k=4),
// we only need to output the indices at positions |, the indices at
// positions _ can be thrown away, see Figure5(b) After Phase 2
// (Merge phase) in the Bitonic Top K paper referenced above.
// For example, the paper shows we only need to output the orange bars.
// The output sequence should look like this | | | | | | | |.
// Because the sequence is halved, to map the output index back
// to the previous sequence to find the corresponding value,
// we need to double the index. When we double the index,
// we basically interpolate a position, so 2i looks like
// | _ | _ | _ | _ | _ | _ | _. We move the | to the first k position
// of each 2k positions by - elemIdx % k. E.g. for output at
// index 4,5,6,7, we want to get the corresponding element at
// original index 8,9,10,11, for output at index 8,9,10,11,
// we want to get the corresponding element at original index
// 16,17,18,19, so on and so forth.
int i = elemIdx < k ? elemIdx : (elemIdx * 2 - imod(elemIdx, k));
int i0 = firstPass == 1 ? i : int(getIndices(batch, i));
int i1 = firstPass == 1 ? i + k : int(getIndices(batch, i + k));
float x0 = getX(batch, i0);
float x1 = i1 < n ? getX(batch, i1) : x0;
setOutput(x0 >= x1 ? float(i0) : float(i1));
}
`}};function Vu(r,e){e!==null&&r.disposeIntermediateTensorInfo(e)}function RD(r){let e=1;for(;e<r;)e*=2;return e}function h9(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{k:s,sorted:a}=o,i=P().getNumber("TOPK_LAST_DIM_CPU_HANDOFF_SIZE_THRESHOLD"),p=P().getNumber("TOPK_K_CPU_HANDOFF_THRESHOLD"),u=n.shape,c=u[u.length-1];if(t.shouldExecuteOnCPU([n])||c<i||s>p){let D=t.readSync(n.dataId),[O,M]=q$(D,u,n.dtype,s,a);return[t.makeTensorInfo(O.shape,O.dtype,O.values),t.makeTensorInfo(M.shape,M.dtype,M.values)]}if(s===0)return u[u.length-1]=0,[t.makeTensorInfo(u,n.dtype,[]),t.makeTensorInfo(u,"int32",[])];if(c===1)return[n,Ba({attrs:{shape:u,dtype:"int32",value:0},backend:t})];let l=t.texData.get(n.dataId),m=l!==null&&l.isPacked,f=m?t.unpackTensor(n):n,h=x.sizeFromShape(u)/c,g=J({inputs:{x:f},attrs:{shape:[h,c]},backend:t});m&&Vu(t,f);let y=RD(s),b=RD(c),C=null,w=()=>C===null?[g,g]:[g,C],k=(D,O,M)=>{let L=w(),W=new kg(M),G=[[c],[C===null?1:0],[Number.NEGATIVE_INFINITY],[D],[O]],q=C;C=t.runWebGLProgram(W,L,"int32",G),Vu(t,q)};for(let D=1;D<y;D*=2){let O=D*2;for(let M=D;M>=1;M/=2)k(O,M,[h,b])}for(let D=b;D>y;D/=2){let O=w(),M=new Tg([h,D/2]),W=[[c],[C===null?1:0],[y]],V=C;C=t.runWebGLProgram(M,O,"int32",W),Vu(t,V);let G=y/2,q=G*2;for(let H=G;H>=1;H/=2)k(q,H,C.shape)}let _=C;C=ps({inputs:{x:C},backend:t,attrs:{begin:0,size:[h,s]}}),Vu(t,_);let E=Lw({inputs:{x:g,indices:C},backend:t,attrs:{axis:1,batchDims:1}});Vu(t,g);let R=u.slice(0,-1);R.push(s),_=C,C=J({inputs:{x:C},attrs:{shape:R},backend:t}),Vu(t,_);let A=E;return E=J({inputs:{x:E},attrs:{shape:R},backend:t}),Vu(t,A),[E,C]}var AD={kernelName:Zn,backendName:"webgl",kernelFunc:h9};var Ng=class{constructor(e,t,o,n,s,a){this.variableNames=["Image","Transforms"],this.outputShape=a;let i=o==="nearest"?1:2,p;switch(n){case"constant":p=1;break;case"reflect":p=2;break;case"wrap":p=3;break;case"nearest":p=4;break;default:p=1;break}this.userCode=`
float mapCoord(float outCoord, float len) {
float inCoord = outCoord;
if(${p} == 2) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz2 = 2.0 * len;
if (inCoord < sz2) {
inCoord = sz2 * float(int(float(-inCoord / sz2))) +
inCoord;
}
inCoord = inCoord < -len ? inCoord + sz2 : -inCoord - 1.0;
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz2 = 2.0 * len;
inCoord -= sz2 * float(int(float(inCoord / sz2)));
if (inCoord >= len) {
inCoord = sz2 - inCoord - 1.0;
}
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (${p} == 3) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz = len - 1.0;
inCoord += len * (float(int(float(-inCoord / sz))) + 1.0);
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz = len - 1.0;
inCoord -= len * float(int(float(inCoord / sz)));
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (${p} == 4) {
return clamp(outCoord, 0.0, len - 1.0);
} else {
return outCoord;
}
}
float readWithFillValue(int batch, int coordY, int coordX,
int channel) {
float outputValue;
if (0 <= coordY && coordY < ${e} && 0 <= coordX && coordX < ${t}) {
outputValue = getImage(batch, coordY, coordX, channel);
} else {
outputValue = float(${s});
}
return outputValue;
}
void main() {
ivec4 coords = getOutputCoords();
float outputValue;
int batch = coords[0];
int x = coords[2];
int y = coords[1];
int channel = coords[3];
float xf = float(x);
float yf = float(y);
float a1 = getTransforms(batch, 0);
float a2 = getTransforms(batch, 1);
float a3 = getTransforms(batch, 2);
float b1 = getTransforms(batch, 3);
float b2 = getTransforms(batch, 4);
float b3 = getTransforms(batch, 5);
float c1 = getTransforms(batch, 6);
float c2 = getTransforms(batch, 7);
float projection = c1 * xf + c2 * yf + 1.0;
if (projection == 0.0) {
outputValue = float(${s});
} else {
float inX = (a1 * xf + a2 * yf + a3) / projection;
float inY = (b1 * xf + b2 * yf + b3) / projection;
float mapX = mapCoord(inX, float(${t}));
float mapY = mapCoord(inY, float(${e}));
if (${i} == 1) {
int coordY = int(round(mapY));
int coordX = int(round(mapX));
outputValue = readWithFillValue(batch, coordY, coordX,
channel);
} else {
float yFloor = floor(mapY);
float xFloor = floor(mapX);
float yCeil = yFloor + 1.0;
float xCeil = xFloor + 1.0;
float valueYFloor = (xCeil - mapX) *
readWithFillValue(batch, int(yFloor), int(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, int(yFloor), int(xCeil), channel);
float valueYCeil = (xCeil - mapX) *
readWithFillValue(batch, int(yCeil), int(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, int(yCeil), int(xCeil), channel);
outputValue = (yCeil - mapY) * valueYFloor +
(mapY - yFloor) * valueYCeil;
}
}
setOutput(outputValue);
}
`}};function g9(r){let{inputs:e,backend:t,attrs:o}=r,{image:n,transforms:s}=e,{interpolation:a,fillMode:i,fillValue:p,outputShape:u}=o,[c,l,m,f]=n.shape,[d,h]=u!=null?u:[l,m],g=[c,d,h,f],y=new Ng(l,m,a,i,p,g);return t.runWebGLProgram(y,[n,s],"float32")}var FD={kernelName:Jn,backendName:"webgl",kernelFunc:g9};function x9(r){let{inputs:e,attrs:t,backend:o}=r,{axis:n}=t,{x:s}=e;as(s,"unique"),console.warn("WARNING: ","UI might be locked temporarily as data is being downloaded");let a=o.readSync(s.dataId),{outputValues:i,outputShape:p,indices:u}=K$(a,n,s.shape,s.dtype);return[o.makeTensorInfo(p,s.dtype,i),o.makeTensorInfo([u.length],"int32",u)]}var DD={kernelName:$p,backendName:"webgl",kernelFunc:x9};function y9(r){let{inputs:e,backend:t,attrs:o}=r,{value:n}=e,{axis:s}=o;s<0&&(s+=n.shape.length);let a=n,i=a.shape.length,p=n.shape[s],u=new Array(i-1),c=0;for(let h=0;h<i;h++)h!==s&&(u[c++]=a.shape[h]);let l=[],m=new Array(i).fill(0),f=a.shape.slice();f[s]=1;let d=new Array(p);for(let h=0;h<d.length;h++){m[s]=h;let g=ps({inputs:{x:a},backend:t,attrs:{begin:m,size:f}}),y=J({inputs:{x:g},backend:t,attrs:{shape:u}});d[h]=y,l.push(g)}return l.forEach(h=>t.disposeIntermediateTensorInfo(h)),d}var PD={kernelName:_s,backendName:"webgl",kernelFunc:y9};var _g=class{constructor(e,t){this.variableNames=["x","segmentIds"];let o=e.windowSize,n=e.batchSize,s=e.inSize,a=e.numSegments,i=a*Math.ceil(s/o);this.outputShape=[n,i];let p="0.0",u="sumValue",c=Math.floor(o/4)*4,l=o%4,m=`
sumValue += dot(values, segFilter);
`,f="";s%o>0&&(f=`
if (inIdx < 0 || inIdx >= ${s}) {
return initializationValue;
}
`);let d="";s%o>0&&(d=`
if (inIdx < 0 || inIdx >= ${s}) {
return -1.0;
}
`),this.userCode=`
const float initializationValue = ${p};
float getValue(int batch, int inIdx) {
${f}
return getX(batch, inIdx);
}
float getSegmentIdAtIndex(int inIdx) {
${d}
return getSegmentIds(inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = int(floor(float(outIdx) / float(
${a})) * float(${o}));
int currentSeg = int(mod(float(outIdx), float(${a})));
float sumValue = 0.0;
for (int i = 0; i < ${c}; i += 4) {
int inIdx = inOffset + i;
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 2)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 3)) == currentSeg ? 1 : 0
);
${m}
}
int inIdx = inOffset + ${c};
if (${l===1}) {
vec4 values = vec4(
getValue(batch, inIdx),
initializationValue,
initializationValue,
initializationValue
);
int inIdxSeg = int(getSegmentIdAtIndex(inIdx));
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
0,
0,
0
);
${m}
} else if (${l===2}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
initializationValue,
initializationValue
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
0,
0
);
${m}
} else if (${l===3}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
initializationValue
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 2)) == currentSeg ? 1 : 0,
0
);
${m}
}
setOutput(${u});
}
`}};function b9(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,segmentIds:s}=e,{numSegments:a}=o,i=n.shape.length,p=[],u=0,c=I.getAxesPermutation([u],i),l=n;c!=null&&(l=xt({inputs:{x:n},backend:t,attrs:{perm:c}}),p.push(l),u=I.getInnerMostAxes(1,i)[0]);let m=I.segment_util.computeOutShape(l.shape,u,a),f=x.sizeFromShape([l.shape[u]]),d=J({inputs:{x:l},backend:t,attrs:{shape:[-1,f]}});p.push(d);let h=Ca(n.dtype),g=(w,k,_,E,R)=>{let A=w.shape[0],D=w.shape[1],O=I.segment_util.segOpComputeOptimalWindowSize(D,R),M={windowSize:O,inSize:D,batchSize:A,numSegments:R},L=new _g(M,k),W=t.compileAndRun(L,[w,_],E);if(p.push(W),W.shape[1]===R)return W;let V=Gw({backend:t,attrs:{start:0,stop:R,step:1,dtype:"float32"}}),G=Hw({inputs:{x:V},backend:t,attrs:{reps:[D/O]}});return p.push(V),p.push(G),g(W,k,G,E,R)},y=g(d,"unsortedSegmentSum",s,h,a),b=J({inputs:{x:y},backend:t,attrs:{shape:m}}),C=b;if(c!=null){p.push(b);let w=I.getUndoAxesPermutation(c);C=xt({inputs:{x:C},backend:t,attrs:{perm:w}})}return p.forEach(w=>t.disposeIntermediateTensorInfo(w)),C}var OD={kernelName:Rp,backendName:"webgl",kernelFunc:b9};var C9=[IR,SR,vR,kR,NR,_R,ER,$R,FR,DR,PR,OR,MR,LR,BR,VR,zR,WR,UR,GR,HR,KR,jR,XR,JR,tA,rA,lR,nA,aA,iA,uA,pA,cA,lA,mA,fA,dA,hA,yA,bA,CA,IA,wA,SA,vA,kA,TA,NA,_A,EA,$A,RA,AA,FA,PA,OA,MA,LA,VA,zA,WA,UA,GA,HA,qA,KA,jA,cR,XA,sA,YA,QA,ZA,mR,JA,eF,tF,rF,oF,nF,sF,aF,iF,uF,cF,lF,mF,fF,dF,hF,xF,bF,CF,IF,wF,SF,_F,hR,EF,$F,RF,AF,YR,FF,OF,MF,LF,BF,fR,VF,zF,WF,UF,GF,QR,vF,HF,qF,KF,xR,jF,XF,YF,QF,ZF,JF,eD,tD,rD,oD,nD,sD,aD,iD,uD,pD,qR,NF,cD,lD,mD,fD,dD,hD,gD,xD,bD,CD,wD,SD,vD,kD,TD,ND,TF,bR,_D,ED,$D,AD,FD,CR,DD,PD,OD,DF];for(let r of C9)ya(r);var Ae;(function(r){r[r.float32=0]="float32",r[r.int32=1]="int32",r[r.bool=2]="bool",r[r.string=3]="string",r[r.complex64=4]="complex64"})(Ae||(Ae={}));var $i;(function(r){r[r.linear=0]="linear",r[r.relu=1]="relu",r[r.relu6=2]="relu6",r[r.prelu=3]="prelu",r[r.leakyrelu=4]="leakyrelu",r[r.sigmoid=5]="sigmoid",r[r.elu=6]="elu"})($i||($i={}));var MD;function I9(r){MD=r.wasm.cwrap(Fo,null,["number","array","number","number","array","number","number","number","number","number","number","number","number"])}function w9(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=e;if(n.dtype!=="float32"||s.dtype!=="float32")throw new Error("_FusedMatMul for non non-float32 tensors not yet supported.");let{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o,m=t.dataIdMap.get(n.dataId).id,f=t.dataIdMap.get(s.dataId).id,d=0;if(a!=null){let R=t.dataIdMap.get(a.dataId);if(R.shape.length!==1)throw new Error(`_FusedMatMul only supports rank-1 bias but got rank ${R.shape.length}.`);d=R.id}let h=i==null?0:t.dataIdMap.get(i.dataId).id,g=$i[c];if(g==null)throw new Error(`${c} activation not yet supported for FusedConv2D in the wasm backend.`);let y=p?n.shape[2]:n.shape[1],b=u?s.shape[1]:s.shape[2],C=br.assertAndGetBroadcastShape(n.shape.slice(0,-2),s.shape.slice(0,-2)),w=t.makeOutput([...C,y,b],n.dtype),k=t.dataIdMap.get(w.dataId).id,_=new Uint8Array(new Int32Array(n.shape).buffer),E=new Uint8Array(new Int32Array(s.shape).buffer);return MD(m,_,n.shape.length,f,E,s.shape.length,p,u,g,d,h,l||0,k),w}var LD={kernelName:Fo,backendName:"wasm",setupFunc:I9,kernelFunc:w9};function Qe(r,e){let t;function o(s){t=s.wasm.cwrap(r,null,["number","number","number"])}function n(s){let{backend:a,inputs:{x:i}}=s,p=a.dataIdMap.get(i.dataId).id,u=a.makeOutput(i.shape,e||i.dtype),c=a.dataIdMap.get(u.dataId).id;return x.sizeFromShape(u.shape)===0||t(p,Ae[i.dtype],c),u}return{kernelName:r,backendName:"wasm",setupFunc:o,kernelFunc:n}}var BD=Qe(sn);function nt(r,e,t){let o;function n(a){o=a.wasm.cwrap(r,null,["number","array","number","number","array","number","number","number"])}function s(a){let{backend:i,inputs:p}=a,{a:u,b:c}=p,l=i.dataIdMap.get(u.dataId).id,m=i.dataIdMap.get(c.dataId).id,f=t!=null?t:u.dtype,d=I.assertAndGetBroadcastShape(u.shape,c.shape),h=i.makeOutput(d,f);if(x.sizeFromShape(d)===0)return h;let g=new Uint8Array(new Int32Array(u.shape).buffer),y=new Uint8Array(new Int32Array(c.shape).buffer),b=i.dataIdMap.get(h.da
${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape
${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=e.dataIdMap.get(o.dataId).id,i=e.dataIdMap.get(n.dataId).id,p=e.dataIdMap.get(s.dataId).id,u=o.shape[0],c=x.sizeFromShape(s.shape),l=e.makeOutput([u,c],o.dtype),m=e.dataIdMap.get(l.dataId).id,f=e.makeOutput([c],s.dtype),d=e.dataIdMap.get(f.dataId).id,h=e.makeOutput([3],"int32"),g=e.dataIdMap.get(h.dataId).id;s3(a,i,p,u,m,d,g);let y=e.readSync(h.dataId),b;switch(y[0]){case 0:{b=I.getSparseReshapeMultipleNegativeOneOutputDimErrorMessage(y[1],y[2]);break}case 1:{b=I.getSparseReshapeNegativeOutputDimErrorMessage(y[1],y[2]);break}case 2:b=I.getSparseReshapeEmptyTensorZeroOutputDimErrorMessage();break;case 3:{let C=Array.from(e.readSync(n.dataId)),w=Array.from(e.readSync(f.dataId));b=I.getSparseReshapeInputOutputMultipleErrorMessage(C,w);break}case 4:{let C=Array.from(e.readSync(n.dataId)),w=Array.from(e.readSync(f.dataId));b=I.getSparseReshapeInputOutputMismatchErrorMessage(C,w);break}default:b=""}if(e.disposeData(h.dataId),b)throw e.disposeData(l.dataId),e.disposeData(f.dataId),new Error(b);return[l,f]}var a3={kernelName:ga,backendName:"wasm",setupFunc:Tee,kernelFunc:Nee};var i3;function Rg(r){i3=r.wasm.cwrap("SparseSegmentReduction",null,["number","number","number","number","number","number","number","number","number"])}function Ag(r,e){let{backend:t,inputs:o}=r,{data:n,indices:s,segmentIds:a}=o,i=s.shape[0],p=t.readSync(a.dataId,i-1,i)[0],c=i>0?p+1:0;if(c<0)throw new Error(I.getSparseSegmentReductionNegativeSegmentIdsErrorMessage());let l=n.shape.slice();l[0]=c;let m=t.dataIdMap.get(n.dataId).id,f=t.dataIdMap.get(s.dataId).id,d=t.dataIdMap.get(a.dataId).id,h=t.makeOutput(l,n.dtype),g=t.dataIdMap.get(h.dataId).id,y=t.makeOutput([4],"int32"),b=t.dataIdMap.get(y.dataId).id;i3(m,Ae[n.dtype],n.shape[0],f,d,g,b,e,0);let C=t.readSync(y.dataId),w;switch(C[0]){case 0:{w=I.getSparseSegmentReductionNegativeSegmentIdsErrorMessage();break}case 1:{w=I.getSparseSegmentReductionNonIncreasingSegmentIdsErrorMessage();break}case 2:w=I.getSparseSegmentReductionSegmentIdOutOfRangeErrorMessage(C[1],C[2]);break;case 3:w=I.getSparseSegmentReductionIndicesOutOfRangeErrorMessage(C[1],C[2],C[3]);break;default:w=""}if(t.disposeData(y.dataId),w)throw t.disposeData(h.dataId),new Error(w);return h}function _ee(r){return Ag(r,!0)}var u3={kernelName:Za,backendName:"wasm",setupFunc:Rg,kernelFunc:_ee};function Eee(r){return Ag(r,!1)}var p3={kernelName:Ja,backendName:"wasm",setupFunc:Rg,kernelFunc:Eee};function $ee(r){let{inputs:e,attrs:t,backend:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=t,i=x.parseAxisParam(a,n.shape)[0],p=I.prepareSplitSize(n,s,i),u=new Array(n.shape.length).fill(0),c=n.shape.slice();return p.map(l=>{let m=[...c];m[i]=l;let f=Xo({inputs:{x:n},attrs:{begin:u,size:m},backend:o});return u[i]+=l,f})}var c3={kernelName:Ts,backendName:"wasm",kernelFunc:$ee};var l3=Qe(bo);var m3=Qe(ti);var Ree=!0,f3=nt(Co,Ree);var d3;function Aee(r){d3=r.wasm.cwrap($s,null,["number","number","number","number"])}function Fee(r){let{backend:e,inputs:t,attrs:o}=r,{alpha:n}=o,{x:s}=t,a=e.dataIdMap.get(s.dataId).id,i=e.makeOutput(s.shape,s.dtype),p=e.dataIdMap.get(i.dataId).id;return d3(a,n,Ae[s.dtype],p),i}var h3={kernelName:$s,backendName:"wasm",setupFunc:Aee,kernelFunc:Fee};var g3;function Dee(r){g3=r.wasm.cwrap(Yn,null,["number","array","number","array","array","array","array","array","number","number"])}function Pee(r){let{backend:e,inputs:t,attrs:o}=r,{x:n}=t,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:f,finalShape:d,isIdentity:h,sliceDim0:g,isSimpleSlice:y,begin:b,end:C,strides:w}=et.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=Mt({inputs:{x:n},backend:e,attrs:{shape:d}});else if(g||y){x.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=et.computeOutShape(b,C,w),E=Xo({inputs:{x:n},backend:e,attrs:{begin:b,size:_}});k=Mt({inputs:{x:E},backend:e,attrs:{shape:d}}),e.disposeData(E.dataId)}else{let _=e.makeOutput(f,"float32"),E=
${Ri()}
fn _start(@builtin(local_invocation_id) LocalId : vec3<u32>,
@builtin(global_invocation_id) GlobalId : vec3<u32>,
@builtin(num_workgroups) NumWorkgroups : vec3<u32>) {
localId = LocalId;
globalId = GlobalId;
numWorkgroups = NumWorkgroups;
main();
}
fn main()
`;break;case 1:e=`
${Ri()}
fn _start(@builtin(local_invocation_id) LocalId : vec3<u32>,
@builtin(global_invocation_id) GlobalId : vec3<u32>,
@builtin(num_workgroups) NumWorkgroups : vec3<u32>) {
localId = LocalId;
globalId = GlobalId;
numWorkgroups = NumWorkgroups;
main(getGlobalIndex());
}
fn main(${r[0]} : i32)
`;break;default:throw Error("Unreachable")}return e}function Ri(){return`
@compute @workgroup_size(workGroupSizeX, workGroupSizeY, workGroupSizeZ)
`}function ite(r,e,t){let o=[];if(o.push(`
const workGroupSizeX = ${t.workGroupSize[0]}u;
const workGroupSizeY = ${t.workGroupSize[1]}u;
const workGroupSizeZ = ${t.workGroupSize[2]}u;
var<private> localId: vec3<u32>;
var<private> globalId: vec3<u32>;
var<private> numWorkgroups: vec3<u32>;
// Only used when the y/z dimension of workgroup size is 1.
fn getGlobalIndex() -> i32 {
${eM(t)?" return i32(globalId.x);":` let localInvocationIndex = localId.z * workGroupSizeX * workGroupSizeY +
localId.y * workGroupSizeX + localId.x;
let workGroupID = (globalId - localId)/vec3<u32>(
workGroupSizeX, workGroupSizeY, workGroupSizeZ);
return i32((workGroupID.z * numWorkgroups.x * numWorkgroups.y +
workGroupID.y * numWorkgroups.x + workGroupID.x) *
(workGroupSizeX * workGroupSizeY * workGroupSizeZ) +
localInvocationIndex);
`}
}
`),t.isFromPixels)return o.push(`
struct Uniform {
size : i32,
numChannels : i32,
outShapeStrides : vec2<i32>,
};
@group(0) @binding(0) var<storage, read_write> result: array<${Tc(e.dtype,t.isVec4)}>;
@group(0) @binding(2) var<uniform> uniforms: Uniform;
`),[Y3,o.join(`
`),Q3(e.shape),t.getUserCode()].join(`
`);let n="struct Uniforms { NAN : f32, ";t.variableNames.forEach((m,f)=>{let d=At(r[f].shape.length);n+=`${m.charAt(0).toLowerCase()+m.slice(1)}Shape : ${d}, `});let s=At(e.shape.length);n+=`outShape : ${s}, `;let a=e.shape.length-1,i=At(a);n+=`
outShapeStrides: ${i}, `,t.size&&(n+="size : i32, "),t.uniforms&&(n+=t.uniforms),n+="};",n=dte(n),o.push(n),t.atomic?o.push(`
@group(0) @binding(0) var<storage, read_write> result: array<atomic<i32>>;
`):o.push(`
@group(0) @binding(0) var<storage, read_write> result: array<${Tc(e.dtype,t.isVec4)}>;
`),t.variableNames.forEach((m,f)=>{o.push(`
@group(0) @binding(${1+f}) var<storage, read> ${m}: array<${t.variableTypes?t.variableTypes[f]:Tc(r[f].dtype,t.isVec4)}>;
`)}),n!==""&&o.push(`
@group(0) @binding(${1+t.variableNames.length}) var<uniform> uniforms: Uniforms;
`);let p=lte(e.shape,t.dispatchLayout),u=[Y3,o.join(`
`),Q3(e.shape),p,mte(e.shape.length)];t.atomic||u.push(fte(e.shape,e.dtype,t.isVec4));let c=r.map((m,f)=>cte(m,e.shape,t.variableTypes?t.variableTypes[f]==="vec4<f32>":t.isVec4,t.dispatchLayout.x.length===e.shape.length)).join(`
`);return u.push(c),u.push(t.getUserCode()),u.join(`
`)}function J3(r,e,t,o){let n=r.shaderKey;if(r.isFromPixels)return n;let s=t.map(c=>c.dtype).concat(o.dtype),a=t.map(c=>I.getBroadcastDims(c.shape,o.shape)),i=t.map(c=>x.arraysEqual(c.shape,o.shape)).join("_"),p=a.map(c=>c.join("_")).join(";"),u=eM(r)?"flatDispatch":"";return n+="_"+(r.workGroupSize?r.workGroupSize.join(","):"")+e.map(c=>c.length).join(",")+s.join(",")+r.variableNames.join(",")+p+i+u,n}var Y3=`
struct vec5 {x: i32, y: i32, z: i32, w: i32, u: i32};
struct vec6 {x: i32, y: i32, z: i32, w: i32, u: i32, v: i32};
// Checks whether coordinates lie within the bounds of the shape.
fn coordsInBounds2D(coord : vec2<i32>, shape : vec2<i32>) -> bool {
return all(coord >= vec2<i32>(0)) && all(coord < shape);
}
fn coordsInBounds3D(coord : vec3<i32>, shape : vec3<i32>) -> bool {
return all(coord >= vec3<i32>(0)) && all(coord < shape);
}
fn coordsInBounds4D(coord : vec4<i32>, shape : vec4<i32>) -> bool {
return all(coord >= vec4<i32>(0)) && all(coord < shape);
}
fn getIndexFromCoords1D(coord : i32, shape : i32) -> i32 {
return coord;
}
fn getIndexFromCoords2D(coords : vec2<i32>, shape : vec2<i32>) -> i32 {
return dot(coords, vec2<i32>(shape.y, 1));
}
fn getIndexFromCoords3D(coords : vec3<i32>, shape : vec3<i32>) -> i32 {
return dot(coords, vec3<i32>(shape.y * shape.z, shape.z, 1));
}
fn getIndexFromCoords4D(coords : vec4<i32>, shape : vec4<i32>) -> i32 {
return dot(coords, vec4<i32>(
shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1));
}
fn getIndexFromCoords5D(coords : vec5, shape : vec5) -> i32 {
let shapeStrides: vec5 = vec5(shape.y * shape.z * shape.w * shape.u, shape.z * shape.w * shape.u, shape.w * shape.u, shape.u, 1);
return coords.x*shapeStrides.x + coords.y*shapeStrides.y + coords.z*shapeStrides.z + coords.w*shapeStrides.w + coords.u*shapeStrides.u;
}
fn getIndexFromCoords6D(coords : vec6, shape : vec6) -> i32 {
let shapeStrides: vec6 = vec6(shape.y * shape.z * shape.w * shape.u * shape.v, shape.z * shape.w * shape.u * shape.v, shape.w * shape.u * shape.v, shape.u * shape.v, shape.v, 1);
return coords.x*shapeStrides.x + coords.y*shapeStrides.y + coords.z*shapeStrides.z + coords.w*shapeStrides.w + coords.u*shapeStrides.u + coords.v*shapeStrides.v;
}
fn idiv(a: i32, b: i32, sign: f32) -> i32 {
var res: i32 = a / b;
let modulo: i32 = a % b;
if (sign < 0. && modulo != 0) {
res = res - 1;
}
return res;
}
// NaN defination in IEEE 754-1985 is :
// - sign = either 0 or 1.
// - biased exponent = all 1 bits.
// - fraction = anything except all 0 bits (since all 0 bits represents infinity).
// https://en.wikipedia.org/wiki/IEEE_754-1985#Representation_of_non-numbers
fn isnan(val: f32) -> bool {
let floatToUint: u32 = bitcast<u32>(val);
return (floatToUint & 0x7fffffffu) > 0x7f800000u;
}
fn isnanVec4(val : vec4<f32>) -> vec4<bool> {
return vec4<bool>(isnan(val[0]), isnan(val[1]), isnan(val[2]), isnan(val[3]));
}
`;function Q3(r){let e=r.length;if(e<=1)return"fn getCoordsFromIndex(index : i32) -> i32 { return index; }";let t=x.computeStrides(r),o=At(e),n=[];for(let a=0;a<e;a++)n.push(`d${a}`);if(t.length===1)return` fn getCoordsFromIndex(index : i32) -> vec2<i32> {
let d0 = index / uniforms.outShapeStrides; let d1 = index - d0 * uniforms.outShapeStrides;
return vec2<i32>(d0, d1);
}`;let s;return s="var index2 = index;"+t.map((a,i)=>{let p=`let ${n[i]} = index2 / uniforms.outShapeStrides.${Yo(i)}`,u=i===t.length-1?`let ${n[i+1]} = index2 - ${n[i]} * uniforms.outShapeStrides.${Yo(i)}`:`index2 = index2 - ${n[i]} * uniforms.outShapeStrides.${Yo(i)}`;return`${p}; ${u};`}).join(""),`
fn getCoordsFromIndex(index : i32) -> ${o} {
${s}
return ${o}(${n.join(",")});
}
`}function ute(r,e){let t=r.name,o=r.shape.length,n=At(o),s="get"+t.charAt(0).toUpperCase()+t.slice(1),a=["d0","d1","d2","d3","d4","d5"].slice(0,o),i=a.map(c=>`${c} : i32`).join(", ");if(o<1)return e?`
fn ${s}() -> vec4<f32> {
return vec4<f32>(${t}[0]);
}
`:`
fn ${s}() ->f32 {
return f32(${t}[0]);
}
`;let p=`uniforms.${t.charAt(0).toLowerCase()+t.slice(1)}Shape`,u=`${o}D`;return o===0&&(u="1D"),e?`
fn ${s}(${i}) -> vec4<f32> {
return vec4<f32>(${t}[getIndexFromCoords${u}(${n}(${a.join(",")}),
${p}) / 4]);
}
`:`
fn ${s}(${i}) -> f32 {
return f32(${t}[getIndexFromCoords${u}(${n}(${a.join(",")}),
${p})]);
}
`}function pte(r,e,t,o){let n=r.name,s=n.charAt(0).toUpperCase()+n.slice(1),a="get"+s+"ByOutput",i=r.shape.length,p=e.length,u=At(p);if(x.arraysEqual(r.shape,e)&&o)return t?`
fn ${a}Index(globalIndex : i32) -> vec4<f32> {
return vec4<f32>(${n}[globalIndex]);
}
fn ${a}Coords(coords : ${u}) -> vec4<f32> {
return vec4<f32>(${n}[${p>1?"getOutputIndexFromCoords(coords)":"coords"} / 4]);
}
`:`
fn ${a}Index(globalIndex : i32) -> f32 {
return f32(${n}[globalIndex]);
}
fn ${a}Coords(coords : ${u}) -> f32 {
return f32(${n}[${p>1?"getOutputIndexFromCoords(coords)":"coords"}]);
}
`;let c=I.getBroadcastDims(r.shape,e),l=p-i,m="";if(i===0)return t?`
fn ${a}Index(globalIndex : i32) -> vec4<f32> {
return get${s}();
}
fn ${a}Coords(coords : ${u}) -> vec4<f32> {
return get${s}();
}
`:`
fn ${a}Index(globalIndex : i32) -> f32{
return get${s}();
}
fn ${a}Coords(coords : ${u}) -> f32{
return get${s}();
}
`;p<2&&c.length>=1?m="coords = 0;":m=c.map(g=>`coords.${Yo(g+l)} = 0;`).join(`
`);let f="";if(p<2&&i>0)f="coords";else if(p>1){let g=At(i),y=r.shape.map((b,C)=>`coords.${Yo(C+l)}`).join(", ");f=`${g}(${y})`}else f="coords";let d=`uniforms.${n.charAt(0).toLowerCase()+n.slice(1)}Shape`,h=`${i}D`;return t?`
fn ${a}Index(globalIndex : i32) -> vec4<f32> {
var coords = getCoordsFromIndex(globalIndex);
${m}
return ${n}[getIndexFromCoords${h}(${f}, ${d}) / 4];
}
fn ${a}Coords(coordsIn : ${u}) -> vec4<f32> {
var coords = coordsIn;
${m}
return ${n}[getIndexFromCoords${h}(${f}, ${d}) / 4];
}
`:`
fn ${a}Index(globalIndex : i32) -> f32 {
var coords = getCoordsFromIndex(globalIndex);
${m}
return f32(${n}[getIndexFromCoords${h}(${f}, ${d})]);
}
fn ${a}Coords(coordsIn : ${u}) -> f32 {
var coords = coordsIn;
${m}
return f32(${n}[getIndexFromCoords${h}(${f}, ${d})]);
}
`}function cte(r,e,t,o){let n=ute(r,t);return r.shape.length<=e.length&&(n+=pte(r,e,t,o)),n}function lte(r,e){let{x:t,y:o=[],z:n=[]}=e,s=r.length,a=t.length+o.length+n.length;if(a!==s)return"";if(t.length===s)return`fn getOutputCoords() -> ${At(s)}{
let globalIndex = getGlobalIndex();
return getCoordsFromIndex(globalIndex);
}
`;let i="",p=[t,o,n];for(let m=0;m<p.length;m++){let f=p[m];if(f.length!==0)if(f.length===1)i+=`let d${f[0]} = i32(globalId[${m}]);`;else{let d=X3(f,"uniforms.outShape");i+=`var index${m} = i32(globalId[${m}]);`;for(let h=0;h<d.length;h++)i+=`let d${f[h]} = index${m} / ${d[h]};`,h===d.length-1?i+=`let d${f[h+1]} = index${m} - d${f[h]} * ${d[h]};`:i+=`index${m} = index${m} - d${f[h]} * ${d[h]};`}}let u=[];for(let m=0;m<a;m++)u.push(`d${m}`);let c=At(a),l=`fn getOutputCoords() -> ${c} {
${i}
`;return u.length===0?l+=`return ${c}(0); }`:l+=`return ${c}(${u.join(",")}); }`,l}function mte(r){let e="";switch(r){case 0:case 1:e+=`
fn getOutputIndexFromCoords(coords : i32) -> i32 {
return coords;
}
`;break;case 2:e+=`
fn getOutputIndexFromCoords(coords : vec2<i32>) -> i32 {
return dot(coords, vec2<i32>(uniforms.outShapeStrides, 1));
}
`;break;case 3:e+=`
fn getOutputIndexFromCoords(coords : vec3<i32>) -> i32 {
return dot(coords, vec3<i32>(uniforms.outShapeStrides.x, uniforms.outShapeStrides.y, 1));
}
`;break;case 4:e+=`
fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
return dot(coords, vec4<i32>(
uniforms.outShapeStrides.x, uniforms.outShapeStrides.y, uniforms.outShapeStrides.z, 1));
}
`;break;case 5:e+=`
fn getOutputIndexFromCoords(coords : vec5) -> i32 {
return coords.x * uniforms.outShapeStrides.x +
coords.y * uniforms.outShapeStrides.y +
coords.z * uniforms.outShapeStrides.z +
coords.w * uniforms.outShapeStrides.w +
coords.u;
}
`;break;case 6:e+=`
fn getOutputIndexFromCoords(coords : vec6) -> i32 {
return coords.x * uniforms.outShapeStrides.x +
coords.y * uniforms.outShapeStrides.y +
coords.z * uniforms.outShapeStrides.z +
coords.w * uniforms.outShapeStrides.w +
coords.u * uniforms.outShapeStrides.u +
coords.v;
}
`;break;default:x.assert(!1,()=>`Unsupported ${r}D shape`);break}return e}function eM(r){return r.dispatch[1]===1&&r.dispatch[2]===1}function Tc(r,e){return r==="float32"?e?"vec4<f32>":"f32":r==="int32"||r==="bool"?e?"vec4<i32>":"i32":r}function fte(r,e,t){let o=r.length,n=Tc(e,t),s;if(t?s=`fn setOutputAtIndex(flatIndex : i32, value : vec4<f32>) {
result[flatIndex] = ${n}(value);
}
fn setOutputAtIndexI32(flatIndex : i32, value : vec4<i32>) {
result[flatIndex] = ${n}(value);
}`:s=`fn setOutputAtIndex(flatIndex : i32, value : f32) {
result[flatIndex] = ${n}(value);
}
fn setOutputAtIndexI32(flatIndex : i32, value : i32) {
result[flatIndex] = ${n}(value);
}`,o>=2){let a=["d0","d1","d2","d3","d4","d5"].slice(0,o),i=At(o);t?s+=`
fn setOutputAtCoords(${a.map(p=>`${p} : i32`).join(", ")}, value : vec4<f32>) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndex(flatIndex / 4, value);
}
fn setOutputAtCoordsI32(${a.map(p=>`${p} : i32`).join(", ")}, value : vec4<i32>) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndexI32(flatIndex / 4, value);
}
`:s+=`
fn setOutputAtCoords(${a.map(p=>`${p} : i32`).join(", ")}, value : f32) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndex(flatIndex, value);
}
fn setOutputAtCoordsI32(${a.map(p=>`${p} : i32`).join(", ")}, value : i32) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndexI32(flatIndex, value);
}
`}return s}function dte(r){let e=/(\w+)\s*:\s*vec(5|6)/g;r=r.replace(e,o=>"@align(16) "+o);let t=/vec(5|6)\s*,\s*(\w+)/g;return r=r.replace(t,(o,n,s)=>`vec${n}, @align(16) ${s}`),r}var pS={};Be(pS,{ArrayBufferToTypedArray:()=>uS,GPUBytesPerElement:()=>iS,MatMulProgramType:()=>Qo,computeDispatch:()=>ae,computeWorkGroupInfoForMatMul:()=>aS,computeWorkGroupSizeForConv2d:()=>Hl,computeWorkPerThreadForConv2d:()=>ql,flatDispatchLayout:()=>fe,isWebGPUSupported:()=>Kl,tilesFitEvenlyIntoShape:()=>gte});var Wu=r=>{let e=1;for(let t=0;t<r.length;t++)e*=r[t];return e};function gte(r,e){if(r.length!==e.length)throw new Error(`Cannot compute whether rank ${r.length} tiles fit evenly into rank ${e.length} shape - ranks must match.`);return e.every((t,o)=>t%r[o]===0)}function ae(r,e,t=[1,1,1],o=[1,1,1]){let[n,s,a]=[Math.ceil(Wu(r.x.map(i=>e[i]))/(t[0]*o[0])),r.y?Math.ceil(Wu(r.y.map(i=>e[i]))/(t[1]*o[1])):1,r.z?Math.ceil(Wu(r.z.map(i=>e[i]))/(t[2]*o[2])):1];return[n,s,a]}function aS(r,e,t,o=!1){let n=[8,8,1],s=[4,4,1];return o||(r<=8&&(s[1]=1),e<=16&&t<=16&&(n[0]=4)),{workGroupSize:n,elementsPerThread:s}}function Hl(r,e,t=!1){if(t)return[8,8,1];let o=Wu(r.x.map(s=>e[s])),n=Wu(r.y.map(s=>e[s]));return o<=4?[4,16,1]:n<=4?[16,4,1]:[16,16,1]}function ql(r,e,t=!1){if(t)return[4,4,1];let o=Wu(r.x.map(s=>e[s])),n=Wu(r.y.map(s=>e[s]));return o<=4?[1,2,1]:n<=4?[2,1,1]:[2,2,1]}function fe(r){return{x:r.map((e,t)=>t)}}function iS(r){if(r==="float32"||r==="int32"||r==="bool"||r==="string")return 4;if(r==="complex64")return 8;throw new Error(`Unknown dtype ${r}`)}function uS(r,e){if(e==="float32")return new Float32Array(r);if(e==="int32")return new Int32Array(r);if(e==="bool"||e==="string")return Uint8Array.from(new Int32Array(r));throw new Error(`Unknown dtype ${e}`)}function Kl(){return(typeof window!="undefined"||typeof WorkerGlobalScope!="undefined")&&!!navigator.gpu}var Qo;(function(r){r[r.MatMulReduceProgram=0]="MatMulReduceProgram",r[r.MatMulSplitKProgram=1]="MatMulSplitKProgram",r[r.MatMulSmallOutputSizeProgram=2]="MatMulSmallOutputSizeProgram",r[r.MatMulPackedProgram=3]="MatMulPackedProgram",r[r.MatMulMax=4]="MatMulMax"})(Qo||(Qo={}));var xte=P().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"),yte=(r,e)=>{let t=r.limits.maxComputeWorkgroupsPerDimension,o=e.dispatchLayout,n=e.dispatch;if(n.every(a=>a<=t))return n;x.assert(n[0]>t&&o.y===void 0&&o.z===void 0,()=>"Dispatch size exceeds WebGPU limits in Y or Z dimension.");let s=Math.ceil(Math.sqrt(n[0]));return s>t?(s=Math.ceil(Math.cbrt(n[0])),x.assert(s<=t,()=>"Total dispatch size exceeds WebGPU maximum."),[s,s,s]):[s,s,1]},Ai=class extends Jr{constructor(e,t){if(super(),this.commandQueueOwnedIds=new WeakSet,this.dispatchNumberInEncoder=0,this.disposed=!1,this.downloadWaitMs=0,this.tensorDataPendingDisposal=[],this.stagingPendingDisposal=[],this.uniformPendingDisposal=[],this.uploadWaitMs=0,!Kl())throw new Error("WebGPU is not supported on this device");this.pipelineCache={},this.device=e,this.queue=e.queue,this.currentCommandEncoder=null,this.currentComputePass=null,this.supportTimeQuery=e.features.has("timestamp-query"),this.adapterInfo=new Og(t),this.bufferManager=new Mg(this.device),this.textureManager=new Lg(this.device),this.tensorMap=new rn(this,cr()),this.supportTimeQuery&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:2})),P().getBool("WEBGPU_USE_PROFILE_TOOL")&&(this.dummyCanvas=document.createElement("canvas"),this.dummyCanvas.width=1,this.dummyCanvas.height=1,this.dummyContext=this.dummyCanvas.getContext("webgpu"),this.dummyContext.configure({device:e,format:"bgra8unorm"}),document.body.appendChild(this.dummyCanvas))}nextDataId(){return Ai.nextDataId++}floatPrecision(){return 32}defaultGpuBufferUsage(){return GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}disposeData(e,t=!1){if(this.tensorDataPendingDisposal.indexOf(e)>=0)return!1;if(!this.tensorMap.has(e))return!0;let o=this.tensorMap.get(e);if(this.decRef(e),!t&&o.refCount>0)return!1;if(this.commandQueueOwnedIds.has(e))return this.tensorDataPendingDisposal.push(e),!1;let{complexTens
if (isnan(a)) { return a; }
if (isnan(b)) { return b; }
`,tM=`
if (isNaN.r) {
resultTemp.r = valueForNaN;
}
if (isNaN.g) {
resultTemp.g = valueForNaN;
}
if (isNaN.b) {
resultTemp.b = valueForNaN;
}
if (isNaN.a) {
resultTemp.a = valueForNaN;
}
`,rM=`
let isNaN = isnanVec4(a) | isnanVec4(b);
${tM}
`,Cte="return a + b;",Ite="return areal * breal - aimag * bimag;",wte="return areal * bimag + aimag * breal;",Ste="return a / b;",vte="return a * b;",kte="return (a - b) * (a - b);",Tte="return a - b;",Nte="return f32(a == b);",_te="return vec4<f32>(a == b);",Ete="return f32(a > b);",$te="return vec4<f32>(a > b);",Rte="return f32(a >= b);",Ate="return vec4<f32>(a >= b);",Fte="return f32(a < b);",Dte="return vec4<f32>(a < b);",Pte="return f32(a <= b);",Ote="return vec4<f32>(a <= b);",Mte="return f32(f32(a) >= 1.0 && f32(b) >= 1.0);",Lte=`return (vec4<f32>(a >= vec4<f32>(1.0)) *
vec4<f32>(b >= vec4<f32>(1.0)));`,Bte=`
let s = sign(a) * sign(b);
let ia = i32(round(a));
let ib = i32(round(b));
return f32(idiv(ia, ib, s));
`,Vte=`
let ia = vec4<i32>(round(a));
let ib = vec4<i32>(round(b));
let cond = ib != vec4<i32>(0);
var resultTemp = vec4<i32>(0);
let s = sign(a) * sign(b);
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
if (cond[0]) {
resultTemp[0] = idiv(ia[0], ib[0], s[0]);
}
if (cond[1]) {
resultTemp[1] = idiv(ia[1], ib[1], s[1]);
}
if (cond[2]) {
resultTemp[2] = idiv(ia[2], ib[2], s[2]);
}
if (cond[3]) {
resultTemp[3] = idiv(ia[3], ib[3], s[3]);
}
return vec4<f32>(resultTemp);
`,zte=`
if (isnan(a) || isnan(b)) {
return 1.0;
}
return f32(a != b);
`,Wte=`
var resultTemp = vec4<f32>(a != b);
let valueForNaN = 1.0;
${rM}
return resultTemp;
`,Ute=`
if(a < 0.0 && floor(b) < b) {
return uniforms.NAN;
}
if (b == 0.0) {
return 1.0;
}
if (round(abs(b) % 2.0) != 1.0) {
return pow(abs(a), b);
}
return sign(a) * pow(abs(a), b);
`,Gte=`
let isModRound1Bool = vec4<i32>(round(abs(b) % vec4<f32>(2.0))) == vec4<i32>(1);
let isModRound1 = vec4<f32>(isModRound1Bool);
let multiplier = sign(a) * isModRound1 + (vec4<f32>(1.0) - isModRound1);
var resultTemp = multiplier * pow(abs(a), b);
// Ensure that a^0 = 1, including 0^0 = 1 as this correspond to TF and JS
let isExpZero = b == vec4<f32>(0.0);
if (isExpZero.r) {
resultTemp.r = 1.0;
}
if (isExpZero.g) {
resultTemp.g = 1.0;
}
if (isExpZero.b) {
resultTemp.b = 1.0;
}
if (isExpZero.a) {
resultTemp.a = 1.0;
}
let isNaN = (a < vec4<f32>(0.0)) & (floor(b) < b);
let valueForNaN = uniforms.NAN;
${tM}
return resultTemp;
`,Hte="if (a < 0.0) { return b * a; } return a;",qte=`
let aLessThanZero = vec4<f32>(a < vec4<f32>(0.0));
return (aLessThanZero * (b * a)) + ((vec4<f32>(1.0) - aLessThanZero) * a);
`;function cS(r,e,t="uniforms.NAN"){let o=e?rM:bte;return e?`
let valueForNaN = ${t};
var resultTemp = vec4<f32>(${r}(a, b));
`+o+`
return resultTemp;
`:o+`
return ${r}(a, b);
`}function Nc(r,e){switch(r){case ye.MUL:return vte;case ye.ADD:return Cte;case ye.ATAN2:return cS("atan2",e);case ye.SUB:return Tte;case ye.DIV:return Ste;case ye.EQUAL:return e?_te:Nte;case ye.GREATER:return e?$te:Ete;case ye.GREATER_EQUAL:return e?Ate:Rte;case ye.LESS:return e?Dte:Fte;case ye.LESS_EQUAL:return e?Ote:Pte;case ye.LOGICAL_AND:return e?Lte:Mte;case ye.NOT_EQUAL:return e?Wte:zte;case ye.SQUARED_DIFFERENCE:return kte;case ye.INT_DIV:return e?Vte:Bte;case ye.PRELU:return e?qte:Hte;case ye.MAX:return cS("max",e);case ye.MIN:return cS("min",e);case ye.POW:return e?Gte:Ute;case ye.COMPLEX_MULTIPLY_REAL:return Ite;case ye.COMPLEX_MULTIPLY_IMAG:return wte;default:throw new Error(`BinaryType ${r} is not implemented!`)}}var pe;(function(r){r[r.ABS=0]="ABS",r[r.CEIL=1]="CEIL",r[r.COS=2]="COS",r[r.COSH=3]="COSH",r[r.ELU=4]="ELU",r[r.EXP=5]="EXP",r[r.EXPM1=6]="EXPM1",r[r.FLOOR=7]="FLOOR",r[r.IS_NAN=8]="IS_NAN",r[r.LINEAR=9]="LINEAR",r[r.LOG=10]="LOG",r[r.LOGICAL_NOT=11]="LOGICAL_NOT",r[r.NEG=12]="NEG",r[r.RELU=13]="RELU",r[r.RELU6=14]="RELU6",r[r.LEAKYRELU=15]="LEAKYRELU",r[r.RECIPROCAL=16]="RECIPROCAL",r[r.RSQRT=17]="RSQRT",r[r.SIN=18]="SIN",r[r.SINH=19]="SINH",r[r.SIGMOID=20]="SIGMOID",r[r.SQRT=21]="SQRT",r[r.SQUARE=22]="SQUARE",r[r.TANH=23]="TANH",r[r.TO_INT=24]="TO_INT"})(pe||(pe={}));var Kte="return abs(a);",jte="return ceil(a);",Xte="return cos(a);",Yte=`
let e2x = exp(-a);
return (e2x + 1.0 / e2x) / 2.0;
`,Qte="return exp(a) - 1.0;",Zte="if (a >= 0.0) { return a; } return (exp(a) - 1.0);",Jte=`
var resFloat = exp(a) - vec4<f32>(1.0);
if (a.r >= 0.0) {
resFloat.r = a.r;
}
if (a.g >= 0.0) {
resFloat.g = a.g;
}
if (a.b >= 0.0) {
resFloat.b = a.b;
}
if (a.a >= 0.0) {
resFloat.a = a.a;
}
return resFloat;
`,ere="return exp(a);",tre="return floor(a);",rre="return f32(isnan(a));",ore="return a;",nre=`if (a < 0.0) { return uniforms.NAN; }
return log(a);`,sre="return f32(!(a >= 1.0));",are="return -a;",ire="if (a < 0.0) { return uniforms.alpha * a; } return a;",ure=`
let aLessThanZero = vec4<f32>(a < vec4<f32>(0.0));
return (aLessThanZero * (uniforms.alpha * a)) + ((vec4<f32>(1.0) - aLessThanZero) * a);
`,pre="return 1.0 / a;",cre="return select(a, 0.0, a < 0.0);",lre="return clamp(a, 0.0, 6.0);",mre="return clamp(a, vec4<f32>(0.0, 0.0, 0.0, 0.0), vec4<f32>(6.0, 6.0, 6.0, 6.0));",fre=`
return select(a, vec4<f32>(0.0), a < vec4<f32>(0.0));
`,dre="return 1.0/sqrt(a);",hre="return 1.0 / (1.0 + exp(-1.0 * a));",gre="return sin(a);",xre=`
let e2x = exp(a);
return (e2x - 1.0 / e2x) / 2.0;
`,yre="return sqrt(a);",bre="return a * a;",Cre=`
let e2x = exp(-2.0 * abs(a));
return sign(a) * (1.0 - e2x) / (1.0 + e2x);
`,Ire="return f32(i32((a)));";function za(r,e){switch(r){case pe.ABS:return Kte;case pe.COS:return Xte;case pe.COSH:return Yte;case pe.CEIL:return jte;case pe.ELU:return e?Jte:Zte;case pe.EXP:return ere;case pe.EXPM1:return Qte;case pe.FLOOR:return tre;case pe.IS_NAN:return rre;case pe.LINEAR:return ore;case pe.LOG:return nre;case pe.LOGICAL_NOT:return sre;case pe.NEG:return are;case pe.LEAKYRELU:return e?ure:ire;case pe.RECIPROCAL:return pre;case pe.RELU:return e?fre:cre;case pe.RELU6:return e?mre:lre;case pe.RSQRT:return dre;case pe.SIGMOID:return hre;case pe.SIN:return gre;case pe.SINH:return xre;case pe.SQRT:return yre;case pe.SQUARE:return bre;case pe.TANH:return Cre;case pe.TO_INT:return Ire;default:throw new Error(`BinaryType ${r} is not implemented!`)}}var vt=r=>{switch(r){case 1:return"f32";case 2:return"vec2<f32>";case 3:return"vec3<f32>";case 4:return"vec4<f32>";default:throw new Error(`${r}-component is not supported.`)}};function ur(r,e=!1,t=!1,o=3){if(r===null)return"";let n="";if(r==="linear")n=za(pe.LINEAR);else if(r==="relu")n=za(pe.RELU,t);else if(r==="elu")n=za(pe.ELU,t);else if(r==="relu6")n=za(pe.RELU6,t);else if(r==="prelu")n=Nc(ye.PRELU,t);else if(r==="sigmoid")n=za(pe.SIGMOID,t);else if(r==="leakyrelu")n=za(pe.LEAKYRELU,t);else throw new Error(`Activation ${r} has not been implemented for the WebGPU backend.`);let a=vt(t?4:1),i="";return e?i=`
fn activation(a : ${a}, coords : vec${o}<i32>) -> ${a} {
let b = getPreluActivationWeightsByOutputCoords(coords);
${n}
}`:i=`
fn activation(a : ${a}, coords : vec${o}<i32>) -> ${a} {
${n}
}`,i}function Kr(r,e){return`
${r?"value = value + getBiasByOutputCoords(coords);":""}
${e?"value = activation(value, coords);":""}
`}function lS(r,e,t,o,n=!1,s=!1,a=!1,i=1){x.assert(t&&i===1||!t,()=>`transposeA ${t} is not compatible with component size ${i}`);let p=`
let batch = ${r?"0":"batchIn"};
${t?"value = getA(batch, col, row);":"value = getA(batch, row, col);"}
`,u=o?"value = getB(batch, col, row);":"value = getB(batch, row, col);";return`
fn mm_readA(batchIn: i32, row: i32, colIn: i32) -> ${vt(i)} {
var value = ${vt(i)}(0.0);
let col = colIn * ${i};
${n&&a?p:`
${t?"if(row < uniforms.dimAOuter && col < uniforms.dimInner)":"if(row < uniforms.aShape[1] && col < uniforms.aShape[2])"}
{
${p}
}
`}
return value;
}
fn mm_readB(batchIn: i32, row: i32, colIn: i32) -> ${vt(i)} {
let col = colIn * ${i};
let batch = ${e?"0":"batchIn"};
var value = ${vt(i)}(0.0);
${u}
return value;
}
`}function jl(r,e,t,o,n,s,a=!1,i=!1,p=!1,u=1){return`
${lS(t,o,n,s,a,i,p,u)}
fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${vt(u)}) {
let col = colIn * ${u};
${a&&i?"":"if (row < uniforms.dimAOuter && col < uniforms.dimBOuter)"}
{
var value = valueIn;
let coords = vec3<i32>(batch, row, col);
${Kr(r,e)}
setOutputAtCoords(coords[0], coords[1], coords[2], value);
}
}
`}var wre=r=>r?`
mm_Asub[inputRow][inputCol] = mm_readA(batch,
kStart + inputRow,
globalRowStart / InnerElementSize + inputCol);
`:`
mm_Asub[inputRow][inputCol] = mm_readA(batch,
globalRow + innerRow,
kStart / InnerElementSize + inputCol);
`,Sre=(r,e)=>r?`
let ACached0 = mm_Asub[k * InnerElementSize][localRow];
let ACached1 = mm_Asub[k * InnerElementSize + 1][localRow];
let ACached2 = mm_Asub[k * InnerElementSize + 2][localRow];
${e===3?"":"let ACached3 = mm_Asub[k * InnerElementSize + 3][localRow];"}
for (var i = 0; i < RowPerThread; i = i + 1) {
acc[i] = BCached0 * ACached0[i] + acc[i];
acc[i] = BCached1 * ACached1[i] + acc[i];
acc[i] = BCached2 * ACached2[i] + acc[i];
${e===3?"":"acc[i] = BCached3 * ACached3[i] + acc[i];"}
}`:`
for (var i = 0; i < RowPerThread; i = i + 1) {
let ACached = mm_Asub[tileRow + i][k];
acc[i] = BCached0 * ACached.x + acc[i];
acc[i] = BCached1 * ACached.y + acc[i];
acc[i] = BCached2 * ACached.z + acc[i];
${e===3?"":"acc[i] = BCached3 * ACached.w + acc[i];"}
}`;function Uu(r,e,t=!1,o=32,n=!1,s=32,a=!1){let i=e[1]*r[1],p=e[0]*r[0],u=t?i:o,c=t?o:i,l=u/e[0],m=o/e[1];return x.assert((t&&l===4&&r[1]===4||!t&&(l===3||l===4))&&u%e[0]===0&&o%e[1]===0&&r[0]===4,()=>`If transposeA ${t} is true, innerElementSize ${l} and workPerThread[1] ${r[1]} must be 4.
Otherwise, innerElementSize ${l} must be 3 or 4.
tileAWidth ${u} must be divisible by workGroupSize[0]${e[0]}. tileInner ${o} must be divisible by workGroupSize[1] ${e[1]}. ColPerThread ${r[0]} must be 4.`),`
var<workgroup> mm_Asub : array<array<vec${l}<f32>, ${u/l}>, ${c}>;
var<workgroup> mm_Bsub : array<array<vec4<f32>, ${p/r[0]}>, ${o}>;
const RowPerThread = ${r[1]};
const ColPerThread = ${r[0]};
const InnerElementSize = ${l};
const TileInner = ${o};
@compute @workgroup_size(workGroupSizeX, workGroupSizeY, workGroupSizeZ)
fn _start(@builtin(local_invocation_id) LocalId : vec3<u32>,
@builtin(global_invocation_id) GlobalId : vec3<u32>,
@builtin(num_workgroups) NumWorkgroups: vec3<u32>,
@builtin(workgroup_id) workgroupId: vec3<u32>) {
localId = LocalId;
globalId = GlobalId;
numWorkgroups = NumWorkgroups;
let localRow = i32(localId.y);
let tileRow = ${a?"0":"localRow * RowPerThread"};
let tileCol = i32(localId.x);
let globalRow = ${a?"0":"i32(globalId.y) * RowPerThread"};
let globalCol = i32(globalId.x);
let batch = ${n?"0":"i32(globalId.z)"};
let globalRowStart = i32(workgroupId.y) * ${i};
let numTiles = ${n?`${Math.ceil(s/o)}`:"(uniforms.dimInner - 1) / TileInner + 1"};
var kStart = ${n?`i32(globalId.z) * ${s}`:"0"};
var acc: array<vec4<f32>, RowPerThread>;
// Loop over shared dimension.
let tileRowB = localRow * ${m};
for (var t = 0; t < numTiles; t = t + 1) {
// Load one tile of A into local memory.
for (var innerRow = 0; innerRow < RowPerThread; innerRow = innerRow + 1) {
let inputRow = tileRow + innerRow;
let inputCol = tileCol;
${wre(t)}
}
// Load one tile of B into local memory.
for (var innerRow = 0; innerRow < ${m}; innerRow = innerRow + 1) {
let inputRow = tileRowB + innerRow;
let inputCol = tileCol;
mm_Bsub[inputRow][inputCol] = mm_readB(batch, kStart + inputRow, globalCol);
}
kStart = kStart + TileInner;
workgroupBarrier();
// Compute acc values for a single thread.
for (var k = 0; k < TileInner / InnerElementSize; k = k + 1) {
let BCached0 = mm_Bsub[k * InnerElementSize][tileCol];
let BCached1 = mm_Bsub[k * InnerElementSize + 1][tileCol];
let BCached2 = mm_Bsub[k * InnerElementSize + 2][tileCol];
${l===3?"":"let BCached3 = mm_Bsub[k * InnerElementSize + 3][tileCol];"}
${Sre(t,l)}
}
workgroupBarrier();
}
for (var innerRow = 0; innerRow < RowPerThread; innerRow = innerRow + 1) {
mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]);
}
}`}var oM=r=>r?`
mm_Asub[inputRow][inputCol] = mm_readA(batch,
kStart + inputRow,
globalRowStart + inputCol);
`:`
mm_Asub[inputRow][inputCol] = mm_readA(batch,
globalRowStart + inputRow,
kStart + inputCol);
`,vre=r=>r?"let ACached = mm_Asub[k][tileRow + innerRow];":"let ACached = mm_Asub[tileRow + innerRow][k];";function Gu(r,e,t=!1,o=32,n=!1,s=32,a=!1){let i=r[1]*e[1],p=r[0]*e[0],u=t?i:o,c=t?o:i;x.assert(c%e[1]===0&&u%e[0]===0&&o%e[1]===0,()=>`tileAHight ${c} must be divisible by workGroupSize[1]${e[1]}, tileAWidth ${u} must be divisible by workGroupSize[0]${e[0]}, tileInner ${o} must be divisible by workGroupSize[1]${e[1]}`);let l=c/e[1],m=u/e[0],f=o/e[1],d=a?`
let localRow = i32(localId.y);
let localCol = i32(localId.x);
let globalRowStart = i32(workgroupId.y) * ${i};
let globalColStart = i32(workgroupId.x) * ${p};
// Loop over shared dimension.
for (var t = 0; t < numTiles; t = t + 1) {
// Load one tile of A into local memory.
for (var inputRow = localRow; inputRow < ${c}; inputRow = inputRow + ${e[1]}) {
for (var inputCol = localCol; inputCol < ${u}; inputCol = inputCol + ${e[0]}) {
${oM(t)}
}
}
// Load one tile of B into local memory.
for (var inputRow = localRow; inputRow < ${o}; inputRow = inputRow + ${e[1]}) {
for (var inputCol = localCol; inputCol < ${p}; inputCol = inputCol + ${e[0]}) {
mm_Bsub[inputRow][inputCol] = mm_readB(batch,
kStart + inputRow,
globalColStart + inputCol);
}
}
kStart = kStart + TileInner;
workgroupBarrier();
// Compute acc values for a single thread.
var BCached : array<f32, ColPerThread>;
for (var k = 0; k < TileInner; k = k + 1) {
for (var inner = 0; inner < ColPerThread; inner = inner + 1) {
BCached[inner] = mm_Bsub[k][localCol + inner * ${e[0]}];
}
for (var innerRow = 0; innerRow < RowPerThread; innerRow = innerRow + 1) {
let ACached = ${t?`mm_Asub[k][localRow + innerRow * ${e[1]}];`:`mm_Asub[localRow + innerRow * ${e[1]}][k];`}
for (var innerCol = 0; innerCol < ColPerThread; innerCol = innerCol + 1) {
acc[innerRow][innerCol] = acc[innerRow][innerCol] +
ACached * BCached[innerCol];
}
}
}
workgroupBarrier();
}
for (var innerRow = 0; innerRow < RowPerThread; innerRow = innerRow + 1) {
let gRow = globalRowStart + localRow + innerRow * ${e[1]};
for (var innerCol = 0; innerCol < ColPerThread; innerCol = innerCol + 1) {
let gCol = globalColStart + localCol + innerCol * ${e[0]};
mm_write(batch, gRow, gCol, acc[innerRow][innerCol]);
}
}
`:`
let tileRow = i32(localId.y) * RowPerThread;
let tileCol = i32(localId.x) * ColPerThread;
let globalRow = i32(globalId.y) * RowPerThread;
let globalCol = i32(globalId.x) * ColPerThread;
let globalRowStart = i32(workgroupId.y) * ${i};
let tileRowA = i32(localId.y) * ${l};
let tileColA = i32(localId.x) * ${m};
let tileRowB = i32(localId.y) * ${f};
// Loop over shared dimension.
for (var t = 0; t < numTiles; t = t + 1) {
// Load one tile of A into local memory.
for (var innerRow = 0; innerRow < ${l}; innerRow = innerRow + 1) {
for (var innerCol = 0; innerCol < ${m}; innerCol = innerCol + 1) {
let inputRow = tileRowA + innerRow;
let inputCol = tileColA + innerCol;
${oM(t)}
}
}
// Load one tile of B into local memory.
for (var innerRow = 0; innerRow < ${f}; innerRow = innerRow + 1) {
for (var innerCol = 0; innerCol < ColPerThread; innerCol = innerCol + 1) {
let inputRow = tileRowB + innerRow;
let inputCol = tileCol + innerCol;
mm_Bsub[inputRow][inputCol] = mm_readB(batch,
kStart + inputRow,
globalCol + innerCol);
}
}
kStart = kStart + TileInner;
workgroupBarrier();
// Compute acc values for a single thread.
var BCached : array<f32, ColPerThread>;
for (var k = 0; k < TileInner; k = k + 1) {
for (var inner = 0; inner < ColPerThread; inner = inner + 1) {
BCached[inner] = mm_Bsub[k][tileCol + inner];
}
for (var innerRow = 0; innerRow < RowPerThread; innerRow = innerRow + 1) {
${vre(t)}
for (var innerCol = 0; innerCol < ColPerThread; innerCol = innerCol + 1) {
acc[innerRow][innerCol] = acc[innerRow][innerCol] + ACached * BCached[innerCol];
}
}
}
workgroupBarrier();
}
for (var innerRow = 0; innerRow < RowPerThread; innerRow = innerRow + 1) {
for (var innerCol = 0; innerCol < ColPerThread; innerCol = innerCol + 1) {
mm_write(batch, globalRow + innerRow, globalCol + innerCol,
acc[innerRow][innerCol]);
}
}
`;return`
var<workgroup> mm_Asub : array<array<f32, ${u}>, ${c}>;
var<workgroup> mm_Bsub : array<array<f32, ${p}>, ${o}>;
const RowPerThread = ${r[1]};
const ColPerThread = ${r[0]};
const TileInner = ${o};
@compute @workgroup_size(workGroupSizeX, workGroupSizeY, workGroupSizeZ)
fn _start(@builtin(local_invocation_id) LocalId : vec3<u32>,
@builtin(global_invocation_id) GlobalId : vec3<u32>,
@builtin(num_workgroups) NumWorkgroups: vec3<u32>,
@builtin(workgroup_id) workgroupId: vec3<u32>) {
localId = LocalId;
globalId = GlobalId;
numWorkgroups = NumWorkgroups;
let batch = ${n?"0":"i32(globalId.z)"};
let numTiles = ${n?`${Math.ceil(s/o)}`:"(uniforms.dimInner - 1) / TileInner + 1"};
var kStart = ${n?`i32(globalId.z) * ${s}`:"0"};
var acc : array<array<f32, ColPerThread>, RowPerThread>;
// Without this initialization strange values show up in acc.
for (var innerRow = 0; innerRow < RowPerThread; innerRow = innerRow + 1) {
for (var innerCol = 0; innerCol < ColPerThread; innerCol = innerCol + 1) {
acc[innerRow][innerCol] = 0.0;
}
}
${d}
}
`}var kre=r=>r?`
mm_readA(batch, colA, globalRow),
mm_readA(batch, colA + 1, globalRow),
mm_readA(batch, colA + 2, globalRow),
mm_readA(batch, colA + 3, globalRow)
`:`
mm_readA(batch, globalRow, colA),
mm_readA(batch, globalRow, colA + 1),
mm_readA(batch, globalRow, colA + 2),
mm_readA(batch, globalRow, colA + 3)
`;function Tre(r,e=!1){return x.assert(r[1]===1&&r[2]===1,()=>`A linear work group size is required. But got ${r}.`),`
const TileSize = ${r[0]*4};
var<workgroup> mm_Asub : array<vec4<f32>, ${r[0]}>;
${ue()} {
let tileCol = i32(localId.x);
let globalCol = i32(globalId.x);
let globalRow = i32(globalId.y);
let numTiles = (uniforms.dimInner - 1) / TileSize + 1;
let batch = i32(globalId.z);
// Without this initialization strange values show up in acc.
var acc = 0.0;
// Loop over shared dimension.
for (var t = 0; t < numTiles; t = t + 1) {
// Load one tile of A into local memory.
let colA = t * TileSize + tileCol * 4;
mm_Asub[tileCol] = vec4<f32>(${kre(e)});
workgroupBarrier();
// Compute acc values for a single thread.
for (var k = 0; k < TileSize / 4; k = k + 1) {
let rowB = t * TileSize + k * 4;
let BCached = vec4<f32>(mm_readB(batch, rowB, globalCol),
mm_readB(batch, rowB + 1, globalCol),
mm_readB(batch, rowB + 2, globalCol),
mm_readB(batch, rowB + 3, globalCol));
let ACached = mm_Asub[k];
acc = acc + dot(ACached, BCached);
}
workgroupBarrier();
}
mm_write(batch, globalRow, globalCol, acc);
}
`}var Bg=class{constructor(e,t,o,n,s=!1,a=!1,i=null,p=null,u=null,c=!1){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=t,this.dispatchLayout={x:[2],y:[1],z:[0]};let l=s?e[1]:e[2];if(this.isVec4=(l%4===0&&!s||t[1]%4===0&&s)&&t[2]%4===0&&!a,this.isVectorA=t[1]===1&&!s,!this.isVec4&&this.isVectorA)this.elementsPerThread=[1,1,1],this.workGroupSize=[32,1,1];else{let d=aS(t[1],l,t[2],s);this.workGroupSize=d.workGroupSize,this.elementsPerThread=d.elementsPerThread}this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,this.elementsPerThread);let m=i!=null,f=u!=null;m&&this.variableNames.push("bias"),f&&this.variableNames.push("preluActivationWeights"),this.sequentialAccessByThreads=c,this.transposeA=s,this.transposeB=a,this.addBias=m,this.activation=p,this.hasPreluActivationWeights=f,this.batchAEqualOne=o,this.batchBEqualOne=n,[this.fitAOuter,this.fitBOuter,this.fitInner]=this.getShapeFit(t[1],t[2],l),this.shaderKey=`matMulPacked_${this.elementsPerThread}_${s}_${a}_${this.activation}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.isVectorA}_${this.batchAEqualOne}_${this.batchBEqualOne}_${this.sequentialAccessByThreads}`}getShapeFit(e,t,o){let n=this.workGroupSize[1]*this.elementsPerThread[1],s=this.workGroupSize[0]*this.elementsPerThread[0];!this.isVec4&&this.isVectorA?this.tileInner=this.workGroupSize[0]*4:this.tileInner=s;let a=e%n===0,i=t%s===0,p=o%this.tileInner===0;return[a,i,p]}getUserCode(){return`
${ur(this.activation,this.hasPreluActivationWeights,this.isVec4)}
${jl(this.addBias,this.activation,this.batchAEqualOne,this.batchBEqualOne,!1,this.transposeB,this.fitAOuter,this.fitBOuter,this.fitInner,this.isVec4?4:1)}
${this.isVec4?Uu(this.elementsPerThread,this.workGroupSize,this.transposeA,this.tileInner,!1,null,this.isVectorA):this.isVectorA?Tre(this.workGroupSize,this.transposeA):Gu(this.elementsPerThread,this.workGroupSize,this.transposeA,this.tileInner,!1,null,this.sequentialAccessByThreads)}
`}};function Nre(){return`
var<workgroup> sumValues : array<f32, workGroupSizeX>;
${ue()} {
let coords = getOutputCoords();
let batch = coords[0];
let row = coords[1];
let col = coords[2];
var sum = 0.0;
let Length = uniforms.dimInner;
for (var k = i32(localId.x); k < Length; k = k + i32(workGroupSizeX)) {
let dataA = mm_readA(batch, row, k);
let dataB = mm_readB(batch, k, col);
sum = sum + dataA * dataB;
}
sumValues[localId.x] = sum;
workgroupBarrier();
for(var currentSize = workGroupSizeX / 2u; currentSize > 1u;
currentSize = currentSize / 2u) {
if (localId.x < currentSize)
{
sumValues[localId.x] = sumValues[localId.x] + sumValues[localId.x + currentSize];
}
workgroupBarrier();
}
if (localId.x == 0u) {
sum = sumValues[0] + sumValues[1];
mm_write(batch, row, col, sum);
}
}
`}var Vg=class{constructor(e,t,o,n=!1,s=!1,a=null,i=null,p=null){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workGroupSize=[256,1,1],this.outputShape=e,this.dispatchLayout={x:[],y:[1,2],z:[0]},this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize);let u=a!=null,c=p!=null;u&&this.variableNames.push("bias"),c&&this.variableNames.push("preluActivationWeights"),this.transposeA=n,this.transposeB=s,this.addBias=u,this.activation=i,this.hasPreluActivationWeights=c,this.batchAEqualOne=t,this.batchBEqualOne=o,this.shaderKey=`matMulReduce_${this.activation}_${n}_${s}_${this.batchAEqualOne}_${this.batchBEqualOne}`}getUserCode(){return`
${ur(this.activation,this.hasPreluActivationWeights)}
${jl(this.addBias,this.activation,this.batchAEqualOne,this.batchBEqualOne,this.transposeA,this.transposeB)}
${Nre()}
`}};function _re(r){let e=r[1],t=r[0],o=e>t?e:t;return`
var<workgroup> mm_Asub : array<array<f32, ${o}>, ${e}>;
var<workgroup> mm_Bsub : array<array<f32, ${t}>, ${o}>;
// If the output size is small for matrix multiplication, avoid to use vec4
// and handle some elements per thread to optimally utilize the ALU.
// Read data from global memory to registers firstly, then store them into
// shared memory, so it is instruction-Level parallelism for arithmetic
// operations and others handle IO operations between barrier api, makes ALU
// and load/store units work simultaneously, could improves the performance.
${ue()} {
let tileRow = i32(localId.y);
let tileCol = i32(localId.x);
let globalRow = i32(globalId.y);
let globalCol = i32(globalId.x);
let batch = i32(globalId.z);
// uniforms.dimInner should be greater than 0.
let numTiles = (uniforms.dimInner - 1) / ${o} + 1;
var acc = 0.0;
var globalColA = tileCol;
var globalRowB = 0;
var regA = mm_readA(batch, globalRow, globalColA);
var regB0 = mm_readB(batch, globalRowB + 2 * tileRow, globalCol);
var regB1 = mm_readB(batch, globalRowB + 2 * tileRow + 1, globalCol);
globalColA = globalColA + ${o};
globalRowB = globalRowB + ${o};
for (var t = 0; t < numTiles; t = t + 1) {
mm_Asub[tileRow][tileCol] = regA;
mm_Bsub[2 * tileRow][tileCol] = regB0;
mm_Bsub[2 * tileRow + 1][tileCol] = regB1;
workgroupBarrier();
regA = mm_readA(batch, globalRow, globalColA);
regB0 = mm_readB(batch, globalRowB + 2 * tileRow, globalCol);
regB1 = mm_readB(batch, globalRowB + 2 * tileRow + 1, globalCol);
globalColA = globalColA + ${o};
globalRowB = globalRowB + ${o};
for (var k = 0; k < ${o}; k = k + 1) {
acc = acc + mm_Asub[tileRow][k] * mm_Bsub[k][tileCol];
}
workgroupBarrier();
}
mm_write(batch, globalRow, globalCol, acc);
}
`}var zg=class{constructor(e,t,o,n=!1,s=!1,a=null,i=null,p=null){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workGroupSize=[16,8,1],this.outputShape=o,this.dispatchLayout={x:[2],y:[1],z:[0]},this.dispatch=[Math.ceil(o[2]/this.workGroupSize[0]),Math.ceil(o[1]/this.workGroupSize[1]),o[0]];let u=a!=null;u&&this.variableNames.push("bias");let c=p!=null;c&&this.variableNames.push("preluActivationWeights"),this.transposeA=n,this.transposeB=s,this.addBias=u,this.activation=i,this.hasPreluActivationWeights=c,this.batchAEqualOne=e[0]===1,this.batchBEqualOne=t[0]===1,this.shaderKey=`matMulSmallOutputSize_${this.activation}_${n}_${s}_${this.batchAEqualOne}_${this.batchBEqualOne}`}getUserCode(){return`
${ur(this.activation,this.hasPreluActivationWeights)}
${jl(this.addBias,this.activation,this.batchAEqualOne,this.batchBEqualOne,this.transposeA,this.transposeB)}
${_re(this.workGroupSize)}
`}};var Wg=class{constructor(e,t,o,n,s=!1,a=!1){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workGroupSize=[8,8,1],this.atomic=!0,this.isVec4=!1,this.splitedDimInner=128,x.assert(e[0]===1,()=>"MatMulSplitKProgram only supports batch = 1."),this.outputShape=e,this.dispatchLayout={x:[2],y:[1],z:[0,3]},this.isVec4=(s&&this.outputShape[1]%4===0||!s&&t%4===0)&&this.outputShape[2]%4===0,this.elementsPerThread=[4,4,this.splitedDimInner],this.isVec4||(this.outputShape[1]<16&&(this.elementsPerThread[1]=1),this.outputShape[2]<16&&(this.elementsPerThread[0]=1)),this.dispatch=ae(this.dispatchLayout,[this.outputShape[0],this.outputShape[1],this.outputShape[2],t],this.workGroupSize,this.elementsPerThread),this.transposeA=s,this.transposeB=a,this.batchAEqualOne=o,this.batchBEqualOne=n,this.shaderKey=`matMulSplitK_${s}_${a}_${o}_${n}_${this.elementsPerThread}_${this.isVec4}`}getUserCode(){let e=n=>`
for (var i = 0; i < ${n}; i = i + 1)
{
var oldValue = atomicLoad(&(result[flatIndex + i]));
var exchanged = false;
for (; !exchanged;) {
let newValueF32 = bitcast<f32>(oldValue) + ${n>1?"value[i]":"value"};
let newValue = bitcast<i32>(newValueF32);
let res = atomicCompareExchangeWeak(&(result[flatIndex + i]), oldValue, newValue);
oldValue = res.old_value;
exchanged = res.exchanged;
}
}
`,t=this.isVec4?4:1;return`
${lS(this.batchAEqualOne,this.batchBEqualOne,!1,this.transposeB,!1,!1,!1,t)}
fn mm_write(batch: i32, row : i32, colIn : i32, value : ${vt(t)}) {
let col = colIn * ${t};
if (row < uniforms.dimAOuter && col < uniforms.dimBOuter) {
let coords = vec3<i32>(batch, row, col);
let flatIndex = getOutputIndexFromCoords(coords);
// The problem is that we should initialize output to zero before using.
// Otherwise, the original value will be added to the result.
${e(t)}
}
}
${this.isVec4?Uu(this.elementsPerThread,this.workGroupSize,this.transposeA,32,!0,this.splitedDimInner):Gu(this.elementsPerThread,this.workGroupSize,this.transposeA,32,!0,this.splitedDimInner)}
`}},Ug=class{constructor(e,t=null,o=null,n=null){this.uniforms="",this.variableNames=["x"],this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.addBias=t!=null,this.hasPreluActivationWeights=n!=null,this.activation=o,this.addBias&&this.variableNames.push("bias"),this.hasPreluActivationWeights&&this.variableNames.push("preluActivationWeights"),this.shaderKey=`biasActivation_${o}`}getUserCode(){return`
${ur(this.activation,this.hasPreluActivationWeights)}
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var value = getXByOutputIndex(index);
${Kr(this.addBias,this.activation)}
setOutputAtIndex(index, value);
}
}
`}};var Gg=class{constructor(e){this.variableNames=[],this.outputShape=[],this.uniforms="value : f32,",this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey="fill"}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
setOutputAtIndex(index, uniforms.value);
}
}
`}};function $o(r){let{backend:e,attrs:t}=r,{shape:o,value:n}=t,{dtype:s}=t;if(s=s||x.inferDtype(n),s==="string"){let a=x.getArrayFromDType(s,x.sizeFromShape(o));return a.fill(n),e.makeTensorInfo(o,s,a)}else{let a=new Gg(o),i=[{type:"float32",data:[n]}];return e.runWebGPUProgram(a,[],s,i)}}var nM={kernelName:ys,backendName:"webgpu",kernelFunc:$o};function xe(r){let{inputs:e,attrs:t}=r,{x:o}=e,{shape:n}=t,s=x.sizeFromShape(o.shape),a=x.inferFromImplicitShape(n,s),i=x.sizeFromShape(a);return x.assert(s===i,()=>`The new shape (${a}) has ${i} elements and the old shape (${o.shape}) has ${s} elements. The new shape and old shape must have the same number of elements.`),r.backend.incRef(o.dataId),{dataId:o.dataId,shape:a,dtype:o.dtype}}var sM={kernelName:Ss,backendName:"webgpu",kernelFunc:xe};function _c({a:r,b:e,transposeA:t,transposeB:o,backend:n,bias:s=null,preluActivationWeights:a=null,leakyreluAlpha:i=0,activation:p=null}){let u=r.shape.length,c=e.shape.length,l=t?r.shape[u-2]:r.shape[u-1],m=o?e.shape[c-1]:e.shape[c-2],f=t?r.shape[u-1]:r.shape[u-2],d=o?e.shape[c-2]:e.shape[c-1],h=r.shape.slice(0,-2),g=e.shape.slice(0,-2),y=x.sizeFromShape(h),b=x.sizeFromShape(g),w=br.assertAndGetBroadcastShape(r.shape.slice(0,-2),e.shape.slice(0,-2)).concat([f,d]);x.assert(l===m,()=>`Error in matMul: inner shapes (${l}) and (${m}) of Tensors with shapes ${r.shape} and ${e.shape} and transposeA=${t} and transposeB=${o} must match.`);let k=t?[y,l,f]:[y,f,l],_=o?[b,d,m]:[b,m,d],E=xe({inputs:{x:r},backend:n,attrs:{shape:k}}),R=xe({inputs:{x:e},backend:n,attrs:{shape:_}}),A=[E,R],D=Math.max(y,b),O=y===1,M=b===1,L=[E,R],W=[{type:"int32",data:[f]},{type:"int32",data:[d]},{type:"int32",data:[l]}],V,G,q=[D,f,d],H=P().get("WEBGPU_MATMUL_PROGRAM_TYPE");switch(H<0&&(f*d<=128?H=Qo.MatMulReduceProgram:D===1&&f<=128&&d<=48&&m>=2e3?H=Qo.MatMulSplitKProgram:f<=16&&(d<=512||m>=2*d)||d<=16&&(f<=512||l>=2*f)?H=Qo.MatMulSmallOutputSizeProgram:H=Qo.MatMulPackedProgram),H){case Qo.MatMulReduceProgram:V=new Vg(q,O,M,t,o,s,p,a);break;case Qo.MatMulSplitKProgram:{if(G=$o({backend:n,attrs:{shape:q,value:0,dtype:r.dtype}}),V=new Wg(q,m,O,M,t,o),s||p){G=n.runWebGPUProgram(V,L,r.dtype,W,G);let Z=new Ug(G.shape,s,p,a),ee=null,X=[G];s&&X.push(s),a&&X.push(a),p==="leakyrelu"&&(ee=[{type:"float32",data:[i]}],Z.uniforms+=" alpha : f32,");let Q=n.runWebGPUProgram(Z,X,G.dtype,ee);A.push(G);let se=xe({inputs:{x:Q},backend:n,attrs:{shape:w}});A.push(Q);for(let ie of A)n.disposeData(ie.dataId);return se}break}case Qo.MatMulSmallOutputSizeProgram:V=new zg(k,_,q,t,o,s,p,a);break;case Qo.MatMulPackedProgram:let Y=n.adapterInfo.isIntel();V=new Bg(k,q,O,M,t,o,s,p,a,Y);break;default:throw new Error(`Unsupported MatMulProgramType ${H}.`)}s&&L.push(s),a&&L.push(a),p==="leakyrelu"&&(W.push({type:"float32",data:[i]}),V.uniforms+=" alpha : f32,"),G=n.runWebGPUProgram(V,L,r.dtype,W,G);let j=xe({inputs:{x:G},backend:n,attrs:{shape:w}});A.push(G);for(let Y of A)n.disposeData(Y.dataId);return j}function Ere(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=e,{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o;return _c({a:n,b:s,transposeA:p,transposeB:u,backend:t,bias:a,preluActivationWeights:i,leakyreluAlpha:l,activation:c})}var aM={kernelName:Fo,backendName:"webgpu",kernelFunc:Ere};var Xl=class{constructor(e,t,o){this.variableNames=["AReal","AImag","BReal","BImag"],this.workGroupSize=[128,1,1],this.size=!0,this.outputShape=I.assertAndGetBroadcastShape(t,o),this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey=`binaryOpComplex_${e}`,this.op=e}getUserCode(){return`
fn binaryOpComplex(
areal : f32, aimag : f32, breal : f32, bimag : f32) -> f32 {
${Nc(this.op,!1)}
}
${ue("index")} {
if(index < uniforms.size) {
let areal = getARealByOutputIndex(index);
let aimag = getAImagByOutputIndex(index);
let breal = getBRealByOutputIndex(index);
let bimag = getBImagByOutputIndex(index);
setOutputAtIndex(index, binaryOpComplex(areal, aimag, breal, bimag));
}
}
`}};var Hu=class{constructor(e,t,o){this.size=!0,this.variableNames=["A","B"],this.outputShape=I.assertAndGetBroadcastShape(t,o),this.dispatchLayout=fe(this.outputShape),this.op=e,this.useSharedMemoryWithA=t.length<=1&&o.length>1&&t[0]<128,this.useSharedMemoryWithB=o.length<=1&&t.length>1&&o[0]<128,this.useSharedMemoryWithA||this.useSharedMemoryWithB?(this.isVec4=!1,this.lastDimensionSize=this.useSharedMemoryWithB?o[0]:t[0],this.shaderKey=`binary_${this.type}_${e}_${this.lastDimensionSize}_${this.useSharedMemoryWithB}`,this.type="shared",this.workGroupSize=[256,1,1],this.workPerThread=1):(x.arraysEqual(t,o)&&x.sizeFromShape(t)%4===0?(this.isVec4=!0,this.type="vec4",this.workPerThread=4):(this.isVec4=!1,this.type="plain",this.workPerThread=1),this.shaderKey=`binary_${this.type}_${e}`,this.workGroupSize=[128,1,1]),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[this.workPerThread,1,1])}getUserCode(){let e,t=this.isVec4?"vec4<f32>":"f32",o=`
fn binaryOperation(a : ${t}, b : ${t}) -> ${t} {
${Nc(this.op,this.isVec4)}
};
`;if(this.type==="shared"){let n=this.lastDimensionSize>1?`coords[${this.outputShape.length-1}]`:"0",s=this.useSharedMemoryWithB?`let a = getAByOutputIndex(index);
let b = sharedBuf[${n}];`:`let a = sharedBuf[${n}];
let b = getBByOutputIndex(index);`;e=`
${o}
var<workgroup> sharedBuf : array<f32, ${this.lastDimensionSize}>;
${ue("index")} {
// Fill in the shared memory buffer.
let localIndex = i32(localId.x);
if(localIndex < ${this.lastDimensionSize}) {
sharedBuf[localIndex] = f32(${this.useSharedMemoryWithB?"B":"A"}[localIndex]);
}
workgroupBarrier();
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
${s}
setOutputAtIndex(index, binaryOperation(a, b));
}
}
`}else e=`
${o}
${ue("index")} {
if (index < uniforms.size) {
let a = getAByOutputIndex(index);
let b = getBByOutputIndex(index);
setOutputAtIndex(index, binaryOperation(a, b));
}
}
`;return e}};function Lt(r){let{inputs:e}=r,{x:t}=e;return r.backend.incRef(t.dataId),{dataId:t.dataId,shape:t.shape,dtype:t.dtype}}var iM={kernelName:uo,backendName:"webgpu",kernelFunc:Lt};function ls(r){let{inputs:e,backend:t}=r,{real:o,imag:n}=e,s=t.makeTensorInfo(o.shape,"complex64"),a=t.tensorMap.get(s.dataId),i=Lt({inputs:{x:o},backend:t}),p=Lt({inputs:{x:n},backend:t});return a.complexTensorInfos={real:i,imag:p},s}var uM={kernelName:aa,backendName:"webgpu",kernelFunc:ls};var Zo=class{constructor(e,t){this.variableNames=["A"],this.size=!0;let o=128;this.workGroupSize=[o,1,1],this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.op=t,this.shaderKey=`unary_${t}`}getUserCode(){return`
fn unaryOperation(a : f32) -> f32 {
${za(this.op,!1)}
}
${ue("index")} {
if (index < uniforms.size) {
let a = getAByOutputIndex(index);
setOutputAtIndex(index, unaryOperation(a));
}
}
`}};function Ge({opType:r,cpuKernelImpl:e,dtype:t}){return({inputs:o,backend:n})=>{let{x:s}=o,a=n,i=t||s.dtype;if(a.shouldExecuteOnCPU([s])&&e!=null){let u=a.tensorMap.get(s.dataId),c=e(u.values,i);return a.makeTensorInfo(s.shape,i,c)}let p=new Zo(s.shape,r);return a.runWebGPUProgram(p,[s],i)}}function it({opType:r,cpuKernelImpl:e,supportsComplex:t=!1,dtype:o}){return({inputs:n,backend:s})=>{let{a,b:i}=n,p=s;if(t&&a.dtype==="complex64"){let l=p.tensorMap.get(a.dataId),m=p.tensorMap.get(i.dataId),f,d;if(r!==ye.MUL)[f,d]=[[l.complexTensorInfos.real,m.complexTensorInfos.real],[l.complexTensorInfos.imag,m.complexTensorInfos.imag]].map(g=>{let[y,b]=g,C={dataId:y.dataId,dtype:y.dtype,shape:a.shape},w={dataId:b.dataId,dtype:b.dtype,shape:i.shape},k=new Hu(r,a.shape,i.shape);return p.runWebGPUProgram(k,[C,w],ct(y.dtype,b.dtype))});else{let g=new Xl(ye.COMPLEX_MULTIPLY_REAL,a.shape,i.shape),y=new Xl(ye.COMPLEX_MULTIPLY_IMAG,a.shape,i.shape),b=[{dataId:l.complexTensorInfos.real.dataId,dtype:l.complexTensorInfos.real.dtype,shape:a.shape},{dataId:l.complexTensorInfos.imag.dataId,dtype:l.complexTensorInfos.imag.dtype,shape:a.shape},{dataId:m.complexTensorInfos.real.dataId,dtype:m.complexTensorInfos.real.dtype,shape:i.shape},{dataId:m.complexTensorInfos.imag.dataId,dtype:m.complexTensorInfos.imag.dtype,shape:i.shape}];f=p.runWebGPUProgram(g,b,"float32"),d=p.runWebGPUProgram(y,b,"float32")}let h=ls({inputs:{real:f,imag:d},backend:p});return p.disposeData(f.dataId),p.disposeData(d.dataId),h}let u=o||ct(a.dtype,i.dtype);if((a.dtype==="string"||i.dtype==="string"||p.shouldExecuteOnCPU([a,i]))&&e!=null){let l=p.tensorMap.get(a.dataId).values,m=p.tensorMap.get(i.dataId).values,f=a.dtype==="string"?I.fromUint8ToStringArray(l):l,d=a.dtype==="string"?I.fromUint8ToStringArray(m):m,[h,g]=e(a.shape,i.shape,f,d,u);return p.makeTensorInfo(g,u,h)}let c=new Hu(r,a.shape,i.shape);return p.runWebGPUProgram(c,[a,i],u)}}var FS={};Be(FS,{addImpl:()=>dS,bincountImpl:()=>lM,bincountReduceImpl:()=>mM,castImpl:()=>fS,ceilImpl:()=>hS,concatImpl:()=>fM,equalImpl:()=>gS,expImpl:()=>xS,expm1Impl:()=>yS,floorImpl:()=>bS,gatherNdImpl:()=>dM,gatherV2Impl:()=>hM,greaterEqualImpl:()=>IS,greaterImpl:()=>CS,lessEqualImpl:()=>SS,lessImpl:()=>wS,linSpaceImpl:()=>gM,logImpl:()=>vS,maxImpl:()=>xM,maximumImpl:()=>kS,minimumImpl:()=>TS,multiplyImpl:()=>Ql,negImpl:()=>yM,notEqualImpl:()=>NS,prodImpl:()=>bM,raggedGatherImpl:()=>IM,raggedTensorToTensorImpl:()=>vM,rangeImpl:()=>kM,rsqrtImpl:()=>ES,scatterImpl:()=>TM,sigmoidImpl:()=>NM,simpleAbsImpl:()=>pM,sliceImpl:()=>_M,sparseFillEmptyRowsImpl:()=>EM,sparseReshapeImpl:()=>$M,sparseSegmentReductionImpl:()=>RM,sqrtImpl:()=>AM,squaredDifferenceImpl:()=>$S,stridedSliceImpl:()=>FM,stringNGramsImpl:()=>DM,stringSplitImpl:()=>PM,stringToHashBucketFastImpl:()=>OM,subImpl:()=>AS,tileImpl:()=>MM,topKImpl:()=>BM,transposeImpl:()=>_S,uniqueImpl:()=>VM});function Xs(r,e){Array.isArray(r)||(r=[r]),r.forEach(t=>{t!=null&&x.assert(t.dtype!=="complex64",()=>`${e} does not support complex64 tensors in the CPU backend.`)})}function pM(r){let e=new Float32Array(r.length);for(let t=0;t<r.length;++t)e[t]=Math.abs(r[t]);return e}function kt(r){return(e,t,o,n,s)=>{let a=I.assertAndGetBroadcastShape(e,t),i=a.length,p=x.computeStrides(a),u=x.sizeFromShape(a),c=x.getTypedArrayFromDType(s,u),l=e.length,m=t.length,f=x.computeStrides(e),d=x.computeStrides(t),h=I.getBroadcastDims(e,a),g=I.getBroadcastDims(t,a);if(h.length+g.length===0)for(let y=0;y<c.length;++y)c[y]=r(o[y%o.length],n[y%n.length]);else for(let y=0;y<c.length;++y){let b=x.indexToLoc(y,i,p),C=b.slice(-l);h.forEach(E=>C[E]=0);let w=x.locToIndex(C,l,f),k=b.slice(-m);g.forEach(E=>k[E]=0);let _=x.locToIndex(k,m,d);c[y]=r(o[w],n[_])}return[c,a]}}function Ec(r){let{inputs:e,backend:t}=r,{real:o,imag:n}=e,s=t.data.get(o.dataId).values,a=t.data.get(n.dataId).values,i=t.makeTensorInfo(o.shape,"complex64"),p=t.data.get(i.dataId);return p.complexTensorInfos={real:t.makeTensorInfo(o.shape,"float32",s),imag:t.makeTensorInfo(n.shape,"float32",a)},i}function Hg(r,e,t="float32"){if(t==="complex64"){let
${ue("index")} {
for (var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if (flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
${e.join(`
`)}
setOutputAtIndex(flatIndex, ${t});
}
}
}
`}};function Wre(r){let{inputs:e,backend:t}=r,o=e;if(o.length===1)return Lt({inputs:{x:o[0]},backend:t});let n=o.map(i=>i.dtype).reduce((i,p)=>ct(i,p)),s=o.map(i=>i.shape),a=new Kg(s);return t.runWebGPUProgram(a,o,n)}var wL={kernelName:an,backendName:"webgpu",kernelFunc:Wre};var Ac=class{constructor(e,t,o){this.workGroupSize=[64,1,1],this.variableNames=["x"],this.uniforms="infinityValue : f32,",this.size=!0;let n=[t];this.op=o==="min"?"<":">";let[s,a]=I.computeOutAndReduceShapes(e,n);this.outputShape=s.length===0?[1]:s,this.dispatchLayout=fe(this.outputShape),x.sizeFromShape(a)<32||x.sizeFromShape(s)>1e3?(this.type="plain",this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize)):(this.type="shared",this.dispatch=ae(this.dispatchLayout,this.outputShape,[1,1,1])),this.inputShape=e,this.shaderKey=`argMinMax_${this.op}_${this.type}`}getUserCode(){let e=()=>this.inputShape.length===1?"uniforms.xShape":`uniforms.xShape.${Yo(this.inputShape.length-1)}`,t=()=>{let o="";if(this.outputShape.length===1)this.inputShape.length!==1&&(o+="outputCoords,");else for(let n=0;n<this.outputShape.length;n++)o+=`outputCoords.${Yo(n)},`;return o};return this.type==="shared"?`
fn DIV_CEIL(a : u32, b : u32) -> u32 {
return ((a - 1u) / b + 1u);
}
${`
var<workgroup> xBestIndices : array<i32, ${this.workGroupSize[0]}>;
var<workgroup> xBestValues : array<f32, ${this.workGroupSize[0]}>;
`}
${ue("index")} {
let outputIndex = index / i32(workGroupSizeX);
let reduceLength = ${e()};
var bestIndex = i32(localId.x);
var bestValue = uniforms.infinityValue;
let outputCoords = getCoordsFromIndex(outputIndex);
for (var k = i32(localId.x); k < reduceLength && outputIndex < uniforms.size;
k = k + i32(workGroupSizeX)) {
let candidate = getX(${t()} k);
if (!isnan(candidate) && candidate ${this.op} bestValue) {
bestValue = candidate;
bestIndex = k;
}
}
xBestValues[localId.x] = bestValue;
xBestIndices[localId.x] = bestIndex;
workgroupBarrier();
var reduceSize = min(u32(reduceLength), workGroupSizeX);
for (var currentSize = reduceSize / 2u; reduceSize > 1u;
currentSize = reduceSize / 2u) {
let interval = DIV_CEIL(reduceSize, 2u);
if (localId.x < currentSize) {
let candidate = xBestValues[localId.x + interval];
if (candidate ${this.op} bestValue) {
bestValue = candidate;
xBestValues[localId.x] = bestValue;
xBestIndices[localId.x] = xBestIndices[localId.x + interval];
}
}
reduceSize = interval;
workgroupBarrier();
}
if (localId.x == 0u && outputIndex < uniforms.size) {
setOutputAtIndexI32(outputIndex, xBestIndices[localId.x]);
}
}
`:`
${ue("index")} {
if (index < uniforms.size) {
let outputCoords = getCoordsFromIndex(index);
var bestIndex = 0;
var bestValue = getX(${t()} 0);
let reduceLength = ${e()};
for (var i = 1; i < reduceLength; i++) {
let candidate = getX(${t()} i);
if (candidate ${this.op} bestValue) {
bestValue = candidate;
bestIndex = i;
}
}
setOutputAtIndexI32(index, bestIndex);
}
}
`}};var jg=class{constructor(e,t){this.variableNames=["A"],this.workGroupSize=[16,16,1];let o=new Array(e.length);for(let n=0;n<o.length;n++)o[n]=e[t[n]];this.outputShape=o,this.dispatchLayout={x:[0],y:[1]},this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[1,1,1]),this.shaderKey="transposeShared"}getUserCode(){return`
const TILE_DIM = ${this.workGroupSize[0]};
var<workgroup> tile : array<array<f32, ${this.workGroupSize[0]+1}>, ${this.workGroupSize[0]}>;
${Ri()}
fn _start(@builtin(local_invocation_id) localId : vec3<u32>,
@builtin(workgroup_id) workgroupId : vec3<u32>) {
var x = i32(workgroupId.x) * TILE_DIM + i32(localId.x);
var y = i32(workgroupId.y) * TILE_DIM + i32(localId.y);
let width = uniforms.outShape[0];
let height = uniforms.outShape[1];
if (x < width && y < height) {
tile[localId.y][localId.x] = A[y * width + x];
}
workgroupBarrier();
x = i32(workgroupId.y) * TILE_DIM + i32(localId.x);
y = i32(workgroupId.x) * TILE_DIM + i32(localId.y);
if (x < height && y < width) {
setOutputAtIndex((y * height + x), tile[localId.x]
[localId.y]);
}
}
`}};var Xg=class{constructor(e,t){this.variableNames=["A"],this.workPerThread=1,this.workGroupSize=[64,1,1],this.size=!0;let o=new Array(e.length);for(let n=0;n<o.length;n++)o[n]=e[t[n]];this.outputShape=o,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[this.workPerThread,1,1]),this.newDim=t,this.shaderKey=`transpose_${t}`}getUserCode(){let e=At(this.outputShape.length),t=Ure(this.newDim);return`
${ue("index")} {
for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if(flatIndex < uniforms.size) {
let resRC = getCoordsFromIndex(flatIndex);
setOutputAtIndex(flatIndex, A[getIndexFromCoords${this.outputShape.length}D(
${e}(${t}), uniforms.aShape)]);
}
}
}
`}};function Ure(r){let e=r.length;if(e>6)throw Error(`Transpose for rank ${e} is not yet supported`);let t=new Array(e);for(let o=0;o<r.length;o++)t[r[o]]=`resRC.${Yo(o)}`;return t.join()}function Nr(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{perm:s}=o,a=t,i=n.shape.length,p=new Array(i);for(let c=0;c<p.length;c++)p[c]=n.shape[s[c]];if(t.shouldExecuteOnCPU([n])){let l=a.tensorMap.get(n.dataId).values,m=bL(l,n.shape,n.dtype,s,p);return t.makeTensorInfo(p,n.dtype,m)}if(n.shape.length===2&&x.arraysEqual(s,[1,0])){let c=new jg(n.shape,s);return a.runWebGPUProgram(c,[n],n.dtype)}let u=new Xg(n.shape,s);return a.runWebGPUProgram(u,[n],n.dtype)}var SL={kernelName:Mr,backendName:"webgpu",kernelFunc:Nr};function Gre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=x.parseAxisParam(s,n.shape),i=I.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=Nr({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=I.getInnerMostAxes(a.length,p.shape.length)),I.assertAxesAreInnerMostDims("argMax",[a[0]],p.shape.length);let c=new Ac(p.shape,a[0],"max"),l=[{type:"float32",data:[Number.NEGATIVE_INFINITY]}],m=t.runWebGPUProgram(c,[p],"int32",l);return u.forEach(f=>t.disposeData(f.dataId)),m}var vL={kernelName:un,backendName:"webgpu",kernelFunc:Gre};function Hre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=x.parseAxisParam(s,n.shape),i=I.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=Nr({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=I.getInnerMostAxes(a.length,p.shape.length)),I.assertAxesAreInnerMostDims("argMin",[a[0]],p.shape.length);let c=new Ac(p.shape,a[0],"min"),l=[{type:"float32",data:[Number.POSITIVE_INFINITY]}],m=t.runWebGPUProgram(c,[p],"int32",l);return u.forEach(f=>t.disposeData(f.dataId)),m}var kL={kernelName:ja,backendName:"webgpu",kernelFunc:Hre};var qre=it({opType:ye.ATAN2}),TL={kernelName:sa,backendName:"webgpu",kernelFunc:qre};var Jl=class{constructor(e,t){this.variableNames=["x"],this.uniforms="stride : vec2<i32>, pad : vec2<i32>, dilation : vec2<i32>, convDims : vec2<i32>, filterDims : vec2<i32>,",this.workGroupSize=[128,1,1],this.size=!0,this.outputShape=e.outShape,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey=`pool2D_${t}`,this.poolType=t}getUserCode(){let e="resultValue = max(value, resultValue);";this.poolType==="avg"&&(e="resultValue = resultValue + value; count = count + 1.0;");let t="resultValue";return this.poolType==="avg"&&(t="resultValue / count"),`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let xRCCorner = vec2<i32>(coords.yz) * uniforms.stride - uniforms.pad;
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
var resultValue = ${this.poolType==="avg"?"0.0":"-1.0 / pow(10.0, -20.0)"};
var count = 0.0;
for (var wR = 0; wR < uniforms.filterDims.x; wR = wR + uniforms.dilation.x) {
let xR = xRCorner + wR;
if (xR < 0 || xR >= uniforms.convDims.x) {
continue;
}
for (var wC = 0; wC < uniforms.filterDims.y; wC = wC + uniforms.dilation.y) {
let xC = xCCorner + wC;
if (xC < 0 || xC >= uniforms.convDims.y) {
continue;
}
let value = getX(batch, xR, xC, coords[3]);
${e}
}
}
setOutputAtIndex(index, ${t});
}
}
`}};var Yg=class{constructor(e){this.variableNames=["x"],this.uniforms="stride : vec2<i32>,",this.workGroupSize=[256,1,1],this.size=!0,this.outputShape=e.outShape,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey="poolWithFilterSizeEqualsOne"}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d = coords[3];
let xRCCorner = coords.yz * uniforms.stride;
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
let value = getX(batch, xRCorner, xCCorner, d);
setOutputAtIndex(index, value);
}
}
`}};var Qg=class{constructor(e,t){this.workGroupSize=[64,1,1],this.variableNames=["x"],this.uniforms="reduceSize : i32,",this.size=!0,this.inputShape=[e.batchSize,e.inSize];let[o]=I.computeOutAndReduceShapes(this.inputShape,[1]);this.outputShape=o.length===0?[1]:o,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,[1,1,1]),this.reduceType=t,this.shaderKey=`reduce_${t}`}getUserCode(){let e="",t="0.0";this.reduceType==="min"||this.reduceType==="max"?(e=`
if (isnan(candidate)) {
bestValue = uniforms.NAN;
} else if (!isnan(bestValue) && candidate ${this.reduceType==="min"?"<":">"} bestValue)
{ bestValue = candidate; }`,t="f32(x[offset])"):this.reduceType==="sum"||this.reduceType==="mean"?e=" bestValue = bestValue + candidate; ":this.reduceType==="prod"&&(e=" bestValue = bestValue * candidate; ",t="1.0");let o=this.reduceType==="mean"?"setOutputAtIndex(outputIndex, bestValue / f32(uniforms.reduceSize));":"setOutputAtIndex(outputIndex, bestValue);";return`
fn DIV_CEIL(a : u32, b : u32) -> u32 {
return ((a - 1u) / b + 1u);
}
${`
var<workgroup> xBestValues : array<f32, ${this.workGroupSize[0]}>;
`}
fn getOffset(outputIndex : i32) -> i32 {
let outputCoords = getCoordsFromIndex(outputIndex);
let offset = ${this.outputShape.length===1?"outputCoords":"outputCoords[0]"} * uniforms.reduceSize;
return offset;
}
${ue("index")} {
let outputIndex = index / i32(workGroupSizeX);
let offset = getOffset(outputIndex);
var bestValue = ${t};
let Length = uniforms.reduceSize;
let WorkPerThread = DIV_CEIL(u32(Length), workGroupSizeX);
for (var k = i32(localId.x); k < Length && outputIndex < uniforms.size;
k = k + i32(workGroupSizeX)) {
let candidate = f32(x[offset + k]);
${e}
}
xBestValues[localId.x] = bestValue;
workgroupBarrier();
var reduceSize = min(u32(Length), workGroupSizeX);
for (var currentSize = reduceSize / 2u; reduceSize > 1u;
currentSize = reduceSize / 2u) {
let interval = DIV_CEIL(reduceSize, 2u);
if (localId.x < currentSize) {
let candidate = xBestValues[localId.x + interval];
${e}
xBestValues[localId.x] = bestValue;
}
reduceSize = interval;
workgroupBarrier();
}
if (localId.x == 0u && outputIndex < uniforms.size) {
${o}
}
}
`}};function Ys(r,e,t,o,n){let s=r.shape.length,a=[],i=x.parseAxisParam(e,r.shape),p=i,u=I.getAxesPermutation(p,s),c=r;u!=null&&(c=Nr({inputs:{x:r},attrs:{perm:u},backend:n}),p=I.getInnerMostAxes(p.length,s),a.push(c)),I.assertAxesAreInnerMostDims(o,p,s);let[l,m]=I.computeOutAndReduceShapes(c.shape,p),f=l;t&&(f=I.expandShapeToKeepDim(l,i));let d;if((o==="max"||o==="prod")&&n.shouldExecuteOnCPU([c])){let h=n.tensorMap.get(c.dataId).values;switch(o){case"max":let g=rL(h,x.sizeFromShape(m),f,r.dtype);d=n.makeTensorInfo(f,r.dtype,g);break;case"prod":let{outVals:y,outShape:b,outDtype:C}=uL(c.shape,c.dtype,h,p);d=n.makeTensorInfo(b,C,y);break;default:throw new Error(`${o} CPU implementation is not yet supported.`)}}else{let h=x.sizeFromShape(m),y=x.sizeFromShape(c.shape)/h,b={windowSize:h,inSize:h,batchSize:y,outSize:1},C=o==="mean"?"float32":Ca(r.dtype),w=[{type:"int32",data:[h]}],k=new Qg(b,o),_=n.runWebGPUProgram(k,[c],C,w);a.push(_),d=xe({inputs:{x:_},attrs:{shape:f},backend:n})}return a.forEach(h=>n.disposeData(h.dataId)),d}function em(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reductionIndices:s,keepDims:a}=o;return Ys(n,s,a,"max",t)}var NL={kernelName:$n,backendName:"webgpu",kernelFunc:em};function DS(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{keepDims:s,axis:a}=o;return Ys(n,a,s,"mean",t)}var _L={kernelName:An,backendName:"webgpu",kernelFunc:DS};function Zg(r,e,t,o){if(e.filterWidth===1&&e.filterHeight===1&&x.arraysEqual(e.inShape,e.outShape))return Lt({inputs:{x:r},backend:o});if(e.filterWidth===e.inWidth&&e.filterHeight===e.inHeight&&e.batchSize===1&&e.padInfo.type==="VALID"){let a=r.shape.length,i=xe({inputs:{x:r},backend:o,attrs:{shape:[r.shape[a-3]*r.shape[a-2],r.shape[a-1]]}}),p;t==="avg"?p=DS({inputs:{x:i},backend:o,attrs:{axis:0,keepDims:!1}}):(x.assert(t==="max",()=>`Invalid pool type ${t}`),p=em({inputs:{x:i},backend:o,attrs:{reductionIndices:0,keepDims:!1}}));let u=xe({inputs:{x:p},backend:o,attrs:{shape:e.outShape}});return o.disposeData(i.dataId),o.disposeData(p.dataId),u}let n,s=[{type:"int32",data:[e.strideHeight,e.strideWidth]}];return e.filterHeight===1&&e.filterWidth===1?n=new Yg(e):(t==="avg"?n=new Jl(e,"avg"):(x.assert(t==="max",()=>`Invalid pool type ${t}`),n=new Jl(e,"max")),s.push({type:"int32",data:[e.padInfo.top,e.padInfo.left]},{type:"int32",data:[e.dilationHeight,e.dilationWidth]},{type:"int32",data:[e.inHeight,e.inWidth]},{type:"int32",data:[e.effectiveFilterHeight,e.effectiveFilterWidth]})),o.runWebGPUProgram(n,[r],r.dtype,s)}function Kre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1,c=I.computePool2DInfo(n.shape,s,a,u,i,p);return Zg(n,c,"avg",t)}var EL={kernelName:pn,backendName:"webgpu",kernelFunc:Kre};function jre(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s}=e,{transposeA:a,transposeB:i}=o;return _c({a:n,b:s,transposeA:a,transposeB:i,backend:t})}var $L={kernelName:cn,backendName:"webgpu",kernelFunc:jre};var Jg=class{constructor(e,t){this.variableNames=["source"],this.workPerThread=1,this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.rank=t.length,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[this.workPerThread,1,1]),this.start=e,this.uniforms=`start : ${At(e.length)}, `,this.shaderKey="slice"}getUserCode(){let e=At(this.rank),t=Xre(this.rank),o;return this.start.length===1?o=this.outputShape.map((s,a)=>"sourceLoc = uniforms.start + coords;"):o=this.outputShape.map((s,a)=>`sourceLoc.${PS[a]} = uniforms.start.${Yo(a)} + coords.${PS[a]};`),`
${ue("index")} {
if (index < uniforms.size) {
var sourceLoc : ${e};
let coords = getCoordsFromIndex(index);
${o.join(`
`)}
setOutputAtIndex(index, getSource(${t}));
}
}
`}},PS=["x","y","z","w","u","v"];function Xre(r){if(r===1)return"sourceLoc";if(r<=6)return PS.slice(0,r).map(e=>`sourceLoc.${e}`).join(",");throw Error(`Slicing for rank ${r} is not yet supported`)}function ms(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,size:a}=o,[i,p]=et.parseSliceParams(n,s,a);if(et.assertParamsValid(n,i,p),t.shouldExecuteOnCPU([n])||n.dtype==="string"){let l=t.tensorMap.get(n.dataId),m=fL(l.values,i,p,n.shape,n.dtype);return t.makeTensorInfo(p,n.dtype,m)}if(x.sizeFromShape(p)===0)return t.makeTensorInfo(p,n.dtype,[]);let u=new Jg(i,p),c=[{type:"int32",data:i}];return t.runWebGPUProgram(u,[n],n.dtype,c)}var RL={kernelName:qn,backendName:"webgpu",kernelFunc:ms};var Yre=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,crops:a}=o;x.assert(n.shape.length<=4,()=>"batchToSpaceND for rank > 4 with a WebGPU backend not implemented yet");let i=s.reduce((b,C)=>b*C),p=I.getReshaped(n.shape,s,i),u=I.getPermuted(p.length,s.length),c=I.getReshapedPermuted(n.shape,s,i),l=I.getSliceBeginCoords(a,s.length),m=I.getSliceSize(c,a,s.length),f=[],d=xe({inputs:{x:n},backend:t,attrs:{shape:p}}),h=Nr({inputs:{x:d},backend:t,attrs:{perm:u}}),g=xe({inputs:{x:h},backend:t,attrs:{shape:c}}),y=ms({inputs:{x:g},backend:t,attrs:{begin:l,size:m}});return f.push(d),f.push(h),f.push(g),f.forEach(b=>t.disposeData(b.dataId)),y},AL={kernelName:hs,backendName:"webgpu",kernelFunc:Yre};var OS=it({opType:ye.NOT_EQUAL,dtype:"bool",cpuKernelImpl:iL}),FL={kernelName:go,backendName:"webgpu",kernelFunc:OS};function Wa(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.tensorMap.get(o.dataId);return Lt({inputs:{x:n.complexTensorInfos.real},backend:t})}var DL={kernelName:la,backendName:"webgpu",kernelFunc:Wa};function PL(r,e){let t=new Zo(r.shape,pe.TO_INT),o=e.runWebGPUProgram(t,[r],"int32");return{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}function MS(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{dtype:s}=o;if(s==="complex64"){if(n.dtype==="complex64")return Lt({inputs:{x:n},backend:t});let a=Wr(n.shape),i=MS({inputs:{x:n},backend:t,attrs:{dtype:"float32"}}),p=ls({inputs:{real:i,imag:a},backend:t});return a.dispose(),t.disposeData(i.dataId),p}if(n.dtype==="complex64"){let a=Wa({inputs:{input:n},backend:t}),i=MS({inputs:{x:a},backend:t,attrs:{dtype:s}});return t.disposeData(a.dataId),i}if(!x.hasEncodingLoss(n.dtype,s)){let a=Lt({inputs:{x:n},backend:t});return{dataId:a.dataId,shape:a.shape,dtype:s}}if(t.shouldExecuteOnCPU([n])){let a=t.tensorMap.get(n.dataId).values,[i,p,u]=WM(a,n.shape,n.dtype,s);return t.makeTensorInfo(i,p,u)}if(s==="int32")return PL(n,t);if(s==="bool"){let a=t.makeTensorInfo([],"bool",x.getTypedArrayFromDType("bool",1)),p=OS({inputs:{a:n,b:a},backend:t});return t.disposeData(a.dataId),p}throw new Error(`Error in Cast: failed to cast ${n.dtype} to ${s}`)}var OL={kernelName:to,backendName:"webgpu",kernelFunc:MS};var Qre=Ge({opType:pe.CEIL,cpuKernelImpl:UM}),ML={kernelName:ro,backendName:"webgpu",kernelFunc:Qre};var ex=class{constructor(e){this.variableNames=["A"],this.uniforms="minVal : f32, maxVal : f32,",this.workPerThread=4,this.workGroupSize=[64,1,1],this.isVec4=!0,this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[this.workPerThread,1,1]),this.shaderKey="clipVec4"}getUserCode(){return`
${ue("index")} {
if(index < uniforms.size) {
let value = getAByOutputIndex(index);
var clampedValue : vec4<f32>;
for (var i = 0; i < 4; i = i + 1) {
if (isnan(value[i])) {
clampedValue[i] = value[i];
} else {
clampedValue[i] = clamp(value[i], uniforms.minVal, uniforms.maxVal);
}
}
setOutputAtIndex(index, clampedValue);
}
}
`}};var tx=class{constructor(e){this.variableNames=["A"],this.uniforms="minVal : f32, maxVal : f32,",this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey="clip"}getUserCode(){return`
${ue("index")} {
if(index < uniforms.size) {
let value = getAByOutputIndex(index);
if (isnan(value)) {
setOutputAtIndex(index, value);
return;
}
setOutputAtIndex(index, clamp(value, uniforms.minVal, uniforms.maxVal));
}
}
`}};function Zre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{clipValueMin:s,clipValueMax:a}=o,i,p=[{type:"float32",data:[s]},{type:"float32",data:[a]}];return x.sizeFromShape(n.shape)%4===0?i=new ex(n.shape):i=new tx(n.shape),t.runWebGPUProgram(i,[n],n.dtype,p)}var LL={kernelName:Ro,backendName:"webgpu",kernelFunc:Zre};var rx=class{constructor(e){this.uniforms="",this.workPerThread=1,this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=I.computeOutShape(e,1),this.variableNames=e.map((t,o)=>`T${o}`),this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[this.workPerThread,1,1]),this.offsetLength=e.length-1;for(let t=0;t<this.offsetLength;t++)this.uniforms+=`offset${t} : i32,`;this.shaderKey="concat"}getUserCode(){let e=[];if(this.offsetLength>0){e.push("if (yC < uniforms.offset0){ setOutputAtCoords(coords.x, coords.y, getT0(yR, yC)); }");for(let s=1;s<this.offsetLength;s++)e.push(`else if (yC < uniforms.offset${[s]}){ setOutputAtCoords(coords.x, coords.y, getT${s}(yR, yC - uniforms.offset${s-1})); }`);let o=this.offsetLength,n=this.offsetLength-1;e.push(`else { setOutputAtCoords(coords.x, coords.y, getT${o}(yR, yC - uniforms.offset${n})); }`)}else e.push("setOutputAtCoords(coords.x, coords.y, getT0(yR, yC));");return`
${ue("index")} {
for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if(flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
let yR = coords.x;
let yC = coords.y;
${e.join(`
`)}
}
}
}
`}};function qu(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.tensorMap.get(o.dataId);return Lt({inputs:{x:n.complexTensorInfos.imag},backend:t})}var BL={kernelName:Ya,backendName:"webgpu",kernelFunc:qu};function Fc(r,e,t){let o=r[0].dtype;if(o==="complex64"){let d=r.map(C=>Wa({inputs:{input:C},backend:t})),h=r.map(C=>qu({inputs:{input:C},backend:t})),g=Fc(d,e,t),y=Fc(h,e,t),b=ls({inputs:{real:g,imag:y},backend:t});return d.forEach(C=>t.disposeData(C.dataId)),h.forEach(C=>t.disposeData(C.dataId)),t.disposeData(g.dataId),t.disposeData(y.dataId),b}let n=t.shouldExecuteOnCPU(r);if(o==="string"&&(n=!0),n){let d=r.map(k=>{let E=[-1,x.sizeFromShape(k.shape.slice(e))];return xe({inputs:{x:k},backend:t,attrs:{shape:E}})}),h=d.map(k=>({vals:t.readSync(k.dataId),shape:k.shape})),g=I.computeOutShape(d.map(k=>k.shape),1),y=d[0].shape[0]===1,b=GM(h,g,o,y),C=I.computeOutShape(r.map(k=>k.shape),e),w=t.makeTensorInfo(C,o,b);return d.forEach(k=>t.disposeData(k.dataId)),w}let s=t.device.limits.maxStorageBuffersPerShaderStage-1;if(r.length>s){let d=[];for(let g=0;g<r.length;g+=s){let y=r.slice(g,g+s);d.push(Fc(y,e,t))}let h=Fc(d,e,t);for(let g of d)t.disposeData(g.dataId);return h}let{tensors2D:a,outShape:i}=Jre(r,e,t),p=a.map(d=>d.shape),u=new rx(p),c=[],l=new Array(p.length-1);if(l.length>0){l[0]=p[0][1],c.push({type:"int32",data:[l[0]]});for(let d=1;d<l.length;d++)l[d]=l[d-1]+p[d][1],c.push({type:"int32",data:[l[d]]})}let m=t.runWebGPUProgram(u,a,a[0].dtype,c);a.forEach(d=>t.disposeData(d.dataId));let f=xe({inputs:{x:m},backend:t,attrs:{shape:i}});return t.disposeData(m.dataId),f}function Jre(r,e,t){let o=I.computeOutShape(r.map(s=>s.shape),e);return{tensors2D:r.map(s=>xe({inputs:{x:s},backend:t,attrs:{shape:[x.sizeFromShape(s.shape.slice(0,e)),x.sizeFromShape(s.shape.slice(e))]}})),outShape:o}}function LS(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o,s=x.parseAxisParam(n,e[0].shape)[0],a=e.map(u=>u.shape);I.assertParamsConsistent(a,s);let i=I.computeOutShape(e.map(u=>u.shape),s);if(x.sizeFromShape(i)===0)return t.makeTensorInfo(i,e[0].dtype,[]);let p=e.filter(u=>x.sizeFromShape(u.shape)>0);return p.length===1?Lt({inputs:{x:p[0]},backend:t}):Fc(p,s,t)}var VL={kernelName:gs,backendName:"webgpu",kernelFunc:LS};function eoe(r,e,t,o,n=!1,s=null,a=!1,i=4,p=4,u=4){let c=A=>{switch(A){case 1:return"resData = x[xIndex];";case 3:return"resData = vec3<f32>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);";case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${A} is not supported.`)}},l=A=>{switch(A){case 1:return"return W[row * uniforms.wShape[3] + colIn];";case 4:return"return W[row * uniforms.wShape[3] / 4 + colIn];";default:throw new Error(`innerElementSize ${A} is not supported.`)}},m=r?`
let coord = vec4<i32>(batch, xRow, xCol, xCh);
`:`
let coord = vec4<i32>(batch, xCh, xRow, xCol);
`,f=r?`
let coords = vec4<i32>(
batch,
row / outWidth,
row % outWidth,
col);
`:`
let coords = vec4<i32>(
batch,
row,
col / outWidth,
col % outWidth);
`,d=r?"uniforms.xShape[1]":"uniforms.xShape[2]",h=r?"uniforms.xShape[2]":"uniforms.xShape[3]",g=r?"row":"col",y=r?"col":"row",b=`
let inChannels = uniforms.wShape[2];
let outWidth = ${r?"uniforms.outShape[2]":"uniforms.outShape[3]"};
let outRow = ${g} / outWidth;
let outCol = ${g} % outWidth;
let WRow = ${y} / (uniforms.filterDims[1] * inChannels);
let WCol = ${y} / inChannels % uniforms.filterDims[1];
let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * WRow - uniforms.pad[0];
let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * WCol - uniforms.pad[1];
let xCh = ${y} % inChannels;
var resData = ${vt(i)}(0.0);
// The bounds checking is always needed since we use it to pad zero for
// the 'same' padding type.
if (xRow >= 0 && xRow < ${d} && xCol >= 0 && xCol < ${h}) {
${m}
let xIndex = getIndexFromCoords4D(coord, uniforms.xShape);
${c(i)}
}
return resData;`,C=r?e&&o?`
let col = colIn * ${i};
${b}`:`
let col = colIn * ${i};
if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
${b}
}
return ${vt(i)}(0.0);`:o&&t?`
let col = colIn * ${i};
${b}`:`
let col = colIn * ${i};
if (row < uniforms.dimInner && col < uniforms.dimBOuter) {
${b}
}
return ${vt(i)}(0.0);`,w=`${l(p)}`,k=vt(u),_=r?vt(i):vt(p),E=r?vt(p):vt(i);return`
${ur(s,a,u===4,4)}
fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${_} {
${r?C:w}
}
fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${E} {
${r?w:C}
}
fn mm_write(batch: i32, row : i32, colIn : i32, valueIn : ${k}) {
let col = colIn * ${u};
if (row < uniforms.dimAOuter && col < uniforms.dimBOuter)
{
var value = valueIn;
let outWidth = ${r?"uniforms.outShape[2]":"uniforms.outShape[3]"};
${f}
${Kr(n,s)}
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}`}var ox=class{constructor(e,t,o,n,s=!1,a=null,i=!1,p=!1){this.variableNames=["x","W"],this.uniforms="filterDims : vec2<i32>, pad : vec2<i32>, stride : vec2<i32>, dilation : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=e.outShape,this.isChannelsLast=e.dataFormat==="channelsLast",this.isVec4=((e.inChannels%4===0||e.inChannels%3===0)&&this.isChannelsLast||e.outWidth%4===0&&!this.isChannelsLast)&&e.outChannels%4===0,this.dispatchLayout=this.isChannelsLast?{x:[3],y:[1,2],z:[0]}:{x:[2,3],y:[1],z:[0]},this.workGroupSize=Hl(this.dispatchLayout,this.outputShape,this.isVec4),this.elementsPerThread=ql(this.dispatchLayout,this.outputShape,this.isVec4),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,this.elementsPerThread),this.isVec4?(this.isChannelsLast&&e.inChannels%4!==0?(this.innerElementSize=3,this.variableTypes=["f32","vec4<f32>"]):(this.innerElementSize=4,this.variableTypes=["vec4<f32>","vec4<f32>"]),s&&(this.variableNames.push("bias"),this.variableTypes.push("vec4<f32>")),i&&(this.variableNames.push("preluActivationWeights"),this.variableTypes.push("vec4<f32>"))):(this.innerElementSize=this.elementsPerThread[0],s&&this.variableNames.push("bias"),i&&this.variableNames.push("preluActivationWeights")),this.sequentialAccessByThreads=p,this.addBias=s,this.activation=a,this.hasPreluActivationWeights=i,this.tileAOuter=this.workGroupSize[1]*this.elementsPerThread[1],this.tileBOuter=this.workGroupSize[0]*this.elementsPerThread[0],this.tileInner=Math.max(this.workGroupSize[0]*this.innerElementSize,this.workGroupSize[1]),this.fitAOuter=t%this.tileAOuter===0,this.fitBOuter=o%this.tileBOuter===0,this.fitInner=n%this.tileInner===0,this.shaderKey=`conv2DMM_${this.elementsPerThread}_${this.activation}}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.innerElementSize}_${this.isChannelsLast}_${this.sequentialAccessByThreads}`}getUserCode(){let e=this.isVec4?Uu(this.elementsPerThread,this.workGroupSize,!this.isChannelsLast,this.tileInner):Gu(this.elementsPerThread,this.workGroupSize,!this.isChannelsLast,this.tileInner,!1,null,this.sequentialAccessByThreads),t=this.isVec4?[this.innerElementSize,4,4]:[1,1,1];return`
${eoe(this.isChannelsLast,this.fitAOuter,this.fitBOuter,this.fitInner,this.addBias,this.activation,this.hasPreluActivationWeights,t[0],t[1],t[2])}
${e}
`}};var nx=class{constructor(e,t=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms="filterDims: vec2<i32>, pad: vec2<i32>, stride: vec2<i32>, dilation: vec2<i32>,",this.workGroupSize=[4,4,8],this.outputShape=e.outShape,this.isChannelsLast=e.dataFormat==="channelsLast",this.dispatchLayout=this.isChannelsLast?{x:[2],y:[1],z:[0,3]}:{x:[3],y:[2],z:[0,1]},this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.addBias=t,this.activation=o,this.hasPreluActivationWeights=n,t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.shaderKey=`conv2dnaive_${this.activation}_${this.isChannelsLast}`}getUserCode(){return`
${ur(this.activation,this.hasPreluActivationWeights,!1,4)}
fn readInp(batch : i32, row : i32, col : i32, chan : i32) -> f32{
let coords = vec4<i32>(batch, row, col, chan);
if (coordsInBounds4D(coords, uniforms.xShape)) {
return getX(batch, row, col, chan);
} else {
return 0.0;
}
}
fn readFilt(row : i32, col : i32, xChannel : i32, outChannel : i32) -> f32{
let coords = vec4<i32>(row, col, xChannel, outChannel);
if(coordsInBounds4D(coords, uniforms.wShape)) {
return getW(row, col, xChannel, outChannel);
} else {
return 0.0;
}
}
fn writeResult(batch : i32, row : i32, col : i32, chan : i32, valueIn : f32) {
let coords = ${this.isChannelsLast?"vec4<i32>(batch, row, col, chan);":"vec4<i32>(batch, chan, row, col);"}
if (coordsInBounds4D(coords, uniforms.outShape)) {
var value = valueIn;
${Kr(this.addBias,this.activation)}
setOutputAtCoords(coords.x, coords.y, coords.z, coords.w, value);
}
}
${ue("index")} {
let coords = getOutputCoords();
let batch = coords[0];
let outChannel = ${this.isChannelsLast?"coords[3];":"coords[1];"}
let outRow = ${this.isChannelsLast?"coords[1];":"coords[2];"}
let outCol = ${this.isChannelsLast?"coords[2];":"coords[3];"}
var acc : f32 = 0.0;
for (var row = 0; row < uniforms.filterDims[0]; row = row + 1) {
for (var col = 0; col < uniforms.filterDims[1]; col = col + 1) {
let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * row - uniforms.pad[0];
let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * col - uniforms.pad[1];
for (var xChannel = 0; xChannel < ${this.isChannelsLast?"uniforms.xShape[3];":"uniforms.xShape[1];"} xChannel = xChannel + 1) {
${this.isChannelsLast?"let v = readInp(batch, xRow, xCol, xChannel);":"let v = readInp(batch, xChannel, xRow, xCol);"}
let f = readFilt(row, col, xChannel, outChannel);
acc = acc + v * f;
}
}
}
writeResult(batch, outRow, outCol, outChannel, acc);
}
`}};function zL(r,e){let t=r.length;return t>=3?e?[...r.slice(0,-3),r[t-3]*r[t-2],r[t-1]]:[...r.slice(0,-3),r[t-3],r[t-2]*r[t-1]]:!e&&t===1&&r[0]>1?[r[0],1]:null}function toe({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=t.dataFormat==="channelsLast",u=!p,c=!1,l=p&&t.filterHeight===t.inHeight&&t.filterWidth===t.inWidth&&t.padInfo.type==="VALID",m=[],f,d;if(l){let y=t.inHeight*t.inWidth*t.inChannels;f=xe({inputs:{x:r},backend:o,attrs:{shape:[1,t.batchSize,y]}}),d=xe({inputs:{x:e},backend:o,attrs:{shape:[1,y,t.outChannels]}})}else f=xe({inputs:{x:r},backend:o,attrs:{shape:p?[t.batchSize,t.inHeight*t.inWidth,t.inChannels]:[t.batchSize,t.inChannels,t.inHeight*t.inWidth]}}),d=xe({inputs:{x:e},backend:o,attrs:{shape:[1,t.inChannels,t.outChannels]}});if(m.push(f),m.push(d),s!=null){let y=zL(s.shape,p);y!=null&&(s=xe({inputs:{x:s},backend:o,attrs:{shape:y}}),m.push(s))}if(n!=null){let y=zL(n.shape,p);y!=null&&(n=xe({inputs:{x:n},backend:o,attrs:{shape:y}}),m.push(n))}let h=_c({a:p?f:d,b:p?d:f,transposeA:u,transposeB:c,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),g=xe({inputs:{x:h},backend:o,attrs:{shape:t.outShape}});m.push(h);for(let y of m)o.disposeData(y.dataId);return g}function sx({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=n!=null,u=s!=null,c=t.dataFormat==="channelsLast",l=c&&t.filterHeight===t.inHeight&&t.filterWidth===t.inWidth&&t.padInfo.type==="VALID",m=P().getBool("WEBGPU_USE_NAIVE_CONV2D_DEBUG");if(!m&&(l||t.filterHeight===1&&t.filterWidth===1&&t.dilationHeight===1&&t.dilationWidth===1&&t.strideHeight===1&&t.strideWidth===1&&(t.padInfo.type==="SAME"||t.padInfo.type==="VALID")))return toe({x:r,filter:e,convInfo:t,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a});let f,d=[t.padInfo.top,t.padInfo.left],h=[{type:"int32",data:[t.filterHeight,t.filterWidth]},{type:"int32",data:[...d]},{type:"int32",data:[t.strideHeight,t.strideWidth]},{type:"int32",data:[t.dilationHeight,t.dilationWidth]}];if(m)f=new nx(t,p,i,u);else{let C=c?t.outHeight*t.outWidth:t.outChannels,w=c?t.outChannels:t.outHeight*t.outWidth,k=t.filterHeight*t.filterWidth*t.inChannels;h.push({type:"int32",data:[C]},{type:"int32",data:[w]},{type:"int32",data:[k]});let _=o.adapterInfo.isIntel();f=new ox(t,C,w,k,p,i,u,_)}let g=[],y=[r,e];p&&(!c&&n.shape.length===1&&(n=xe({inputs:{x:n},backend:o,attrs:{shape:[n.shape[0],1,1]}}),g.push(n)),y.push(n)),u&&(!c&&s.shape.length===1&&(s=xe({inputs:{x:s},backend:o,attrs:{shape:[s.shape[0],1,1]}}),g.push(s)),y.push(s)),i==="leakyrelu"&&(h.push({type:"float32",data:[a]}),f.uniforms+=" alpha : f32,");let b=o.runWebGPUProgram(f,y,r.dtype,h);for(let C of g)o.disposeData(C.dataId);return b}function roe(r){let{inputs:e,attrs:t,backend:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dataFormat:p,dilations:u,dimRoundingMode:c}=t,l=I.convertConv2DDataFormat(p),m=I.computeConv2DInfo(n.shape,s.shape,a,u,i,c,!1,l);return sx({x:n,filter:s,convInfo:m,backend:o})}var WL={kernelName:ln,backendName:"webgpu",kernelFunc:roe};function ooe(r=4){let e=s=>{switch(s){case 1:return"return W[getIndexFromCoords4D(coord, uniforms.wShape)];";case 4:return`
let coord1 = vec4<i32>(coordX, coordY, col + 1, rowInner);
let coord2 = vec4<i32>(coordX, coordY, col + 2, rowInner);
let coord3 = vec4<i32>(coordX, coordY, col + 3, rowInner);
let v0 = W[getIndexFromCoords4D(coord, uniforms.wShape)];
let v1 = W[getIndexFromCoords4D(coord1, uniforms.wShape)];
let v2 = W[getIndexFromCoords4D(coord2, uniforms.wShape)];
let v3 = W[getIndexFromCoords4D(coord3, uniforms.wShape)];
return vec4<f32>(v0, v1, v2, v3);
`;default:throw new Error(`innerElementSize ${s} is not supported.`)}},o=`if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
${`
let outRow = row / uniforms.outShape[2];
let outCol = row % uniforms.outShape[2];
let WRow = col / (uniforms.filterDims[1] * uniforms.outBackprop[3]);
let WCol = col / uniforms.outBackprop[3] % uniforms.filterDims[1];
let xR = f32(outRow - uniforms.pads[0] + WRow) / f32(uniforms.stride[0]);
let xC = f32(outCol - uniforms.pads[1] + WCol) / f32(uniforms.stride[1]);
if (xR < 0.0 || xR >= f32(uniforms.outBackprop[1]) || fract(xR) > 0.0) {
return ${vt(r)}(0.0);
}
if (xC < 0.0 || xC >= f32(uniforms.outBackprop[2]) || fract(xC) > 0.0) {
return ${vt(r)}(0.0);
}
let coord = vec4<i32>(
batch,
i32(xR),
i32(xC),
col % uniforms.outBackprop[3]);
return x[getIndexFromCoords4D(coord, uniforms.xShape)/${r}];`}
}
return ${vt(r)}(0.0);`;return`
fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${vt(r)} {
let col = colIn * ${r};
${o}
}
fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${vt(r)} {
let col = colIn * ${r};
let coordX = uniforms.filterDims.x - 1 -
row / (uniforms.filterDims[1] * uniforms.outBackprop[3]);
let coordY = uniforms.filterDims.y - 1 -
(row / uniforms.outBackprop[3]) % uniforms.filterDims[1];
if (row < uniforms.dimInner && col < uniforms.dimBOuter &&
coordX >= 0 && coordY >= 0) {
let rowInner = row % uniforms.outBackprop[3];
let coord = vec4<i32>(coordX, coordY, col, rowInner);
${e(r)}
}
return ${vt(r)}(0.0);
}
fn mm_write(batch: i32, row : i32, colIn : i32, valueInput : ${vt(r)}) {
let col = colIn * ${r};
if (row < uniforms.dimAOuter && (col + ${r-1}) < uniforms.dimBOuter) {
var value = valueInput;
let outCoord = vec4<i32>(
batch,
row / uniforms.outShape[2],
row % uniforms.outShape[2],
col);
result[getIndexFromCoords4D(outCoord, uniforms.outShape)/${r}] = value;
}
}`}var ax=class{constructor(e){this.variableNames=["x","W"],this.uniforms="filterDims : vec2<i32>, pads : vec2<i32>, stride : vec2<i32>, outBackprop : vec4<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=e.inShape,x.assert(e.dataFormat==="channelsLast",()=>"TODO: NCHW is unimplemented"),this.isVec4=e.inChannels%4===0&&e.outChannels%4===0,this.dispatchLayout={x:[3],y:[1,2],z:[0]},this.workGroupSize=Hl(this.dispatchLayout,this.outputShape,this.isVec4),this.elementsPerThread=ql(this.dispatchLayout,this.outputShape,this.isVec4),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,this.elementsPerThread),this.isVec4&&(this.variableTypes=["vec4<f32>","f32"]),this.shaderKey=`conv2DDerInputMM_${this.isVec4}_${this.elementsPerThread}`}getUserCode(){let e=this.isVec4?Uu(this.elementsPerThread,this.workGroupSize):Gu(this.elementsPerThread,this.workGroupSize);return`
${ooe(this.isVec4?4:1)}
${e}
`}};var ix=class{constructor(e){this.variableNames=["dy","W"],this.uniforms="filterDims : vec2<i32>, pads : vec2<i32>, stride : vec2<i32>, outBackprop : vec4<i32>,",this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e.inShape,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.isChannelsLast=e.dataFormat==="channelsLast",this.shaderKey=`conv2DDerInput_${this.isChannelsLast}`}getUserCode(){let e=this.isChannelsLast?1:2,t=this.isChannelsLast?2:3,o=this.isChannelsLast?3:1;return`
${ue("index")} {
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d1 = coords[${o}];
let dyCorner = vec2<i32>(coords[${e}], coords[${t}]) - uniforms.pads;
let dyRCorner = dyCorner.x;
let dyCCorner = dyCorner.y;
// Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
for (var wR = 0; wR < uniforms.filterDims.x; wR = wR + 1) {
let dyR = (f32(dyRCorner) + f32(wR)) / f32(uniforms.stride.x);
let wRPerm = uniforms.filterDims.x - 1 - wR;
if (dyR < 0.0 || dyR >= f32(uniforms.outBackprop[1]) || fract(dyR) > 0.0 ||
wRPerm < 0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims.y; wC = wC + 1) {
let dyC = (f32(dyCCorner) + f32(wC)) / f32(uniforms.stride.y);
let wCPerm = uniforms.filterDims.y - 1 - wC;
if (dyC < 0.0 || dyC >= f32(uniforms.outBackprop[2]) ||
fract(dyC) > 0.0 || wCPerm < 0) {
continue;
}
let idyC = i32(dyC);
for (var d2 = 0; d2 < uniforms.outBackprop[3]; d2 = d2 + 1) {
if (${this.isChannelsLast}) {
let xValue = getDy(batch, idyR, idyC, d2);
let wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd = dotProd + xValue * wValue;
} else {
let xValue = getDy(batch, d2, idyR, idyC);
let wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd = dotProd + xValue * wValue;
}
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}};function noe(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{inputShape:a,strides:i,pad:p,dataFormat:u,dimRoundingMode:c}=o,l=I.convertConv2DDataFormat(u),m=I.computeConv2DInfo(a,s.shape,i,1,p,c,!1,l),f=[{type:"int32",data:[m.filterHeight,m.filterWidth]},{type:"int32",data:[m.filterHeight-1-m.padInfo.top,m.filterWidth-1-m.padInfo.left]},{type:"int32",data:[m.strideHeight,m.strideWidth]},{type:"int32",data:[m.batchSize,m.outHeight,m.outWidth,m.outChannels]}],d;if(P().getBool("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE")||m.filterHeight<=2&&m.filterWidth<=2&&m.outChannels<=16&&m.inChannels===1)d=new ix(m);else{d=new ax(m);let h=m.inHeight*m.inWidth,g=m.inChannels,y=m.filterHeight*m.filterWidth*m.outChannels;f.push({type:"uint32",data:[h]},{type:"uint32",data:[g]},{type:"uint32",data:[y]})}return t.runWebGPUProgram(d,[n,s],"float32",f)}var UL={kernelName:mn,backendName:"webgpu",kernelFunc:noe};var soe=Ge({opType:pe.COS}),GL={kernelName:fn,backendName:"webgpu",kernelFunc:soe};var aoe=Ge({opType:pe.COSH}),HL={kernelName:dn,backendName:"webgpu",kernelFunc:aoe};var ux=class{constructor(e,t,o,n){this.variableNames=["Image","Boxes","BoxInd"],this.uniforms="extrapolationValue : f32,",this.workGroupSize=[64,1,1],this.size=!0;let[s]=t;this.outputShape=[s,o[0],o[1],e],this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.methodId=n==="bilinear"?1:0,this.cropHeightBiggerThan1=this.outputShape[1]>1,this.cropWidthBiggerThan1=this.outputShape[2]>1,this.shaderKey=`cropAndResize_${this.methodId}_${this.cropHeightBiggerThan1}_${this.cropWidthBiggerThan1}`}getUserCode(){let[e,t]=["f32(uniforms.imageShape[1] - 1)","f32(uniforms.imageShape[2] - 1)"],[o,n,s]=this.cropHeightBiggerThan1?[`(${e} / f32(uniforms.outShape[1] - 1))`,"(y2-y1) * height_ratio",`y1*${e} + f32(y)*(height_scale)`]:["0.0","0.0",`0.5 * (y1+y2) * ${e}`],[a,i,p]=this.cropWidthBiggerThan1?[`(${t} / f32(uniforms.outShape[2] - 1))`,"(x2-x1) * width_ratio",`x1*${t} + f32(x)*(width_scale)`]:["0.0","0.0",`0.5 * (x1+x2) * ${t}`];return`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let height_ratio = f32(${o});
let width_ratio = f32(${a});
let b = coords[0];
let y = coords[1];
let x = coords[2];
let d = coords[3];
// get box vals
let y1 = getBoxes(b, 0);
let x1 = getBoxes(b, 1);
let y2 = getBoxes(b, 2);
let x2 = getBoxes(b, 3);
// get image in batch index
let bInd = i32(round(getBoxInd(b)));
if(bInd < 0 || bInd >= uniforms.outShape[0]) {
return;
}
let height_scale = ${n};
let width_scale = ${i};
let in_y = ${s};
if( in_y < 0.0 || in_y > ${e} ) {
setOutputAtIndex(index, uniforms.extrapolationValue);
return;
}
let in_x = ${p};
if( in_x < 0.0 || in_x > ${t} ) {
setOutputAtIndex(index, uniforms.extrapolationValue);
return;
}
let sourceFracIndexCR = vec2<f32>(in_x,in_y);
if(${this.methodId} == 1) {
// Compute the four integer indices.
let sourceFloorCR = vec2<i32>(sourceFracIndexCR);
let sourceCeilCR = vec2<i32>(ceil(sourceFracIndexCR));
let topLeft = getImage(bInd, sourceFloorCR.y, sourceFloorCR.x, d);
let bottomLeft = getImage(bInd, sourceCeilCR.y, sourceFloorCR.x, d);
let topRight = getImage(bInd, sourceFloorCR.y, sourceCeilCR.x, d);
let bottomRight = getImage(bInd, sourceCeilCR.y, sourceCeilCR.x, d);
let fracCR = sourceFracIndexCR - vec2<f32>(sourceFloorCR);
let top = topLeft + (topRight - topLeft) * fracCR.x;
let bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR.x;
let newValue = top + (bottom - top) * fracCR.y;
setOutputAtIndex(index, newValue);
} else {
// Compute the coordinators of nearest neighbor point.
let sourceNearestCR = vec2<i32>(floor(
sourceFracIndexCR + vec2<f32>(0.5,0.5)));
let newValue = getImage(
bInd, sourceNearestCR.y, sourceNearestCR.x, d);
setOutputAtIndex(index, newValue);
}
}
}
`}};var ioe=r=>{let{inputs:e,backend:t,attrs:o}=r,{image:n,boxes:s,boxInd:a}=e,{cropSize:i,method:p,extrapolationValue:u}=o,c=new ux(n.shape[3],s.shape,i,p),l=[{type:"float32",data:[u]}];return t.runWebGPUProgram(c,[n,s,a],"float32",l)},qL={kernelName:xn,backendName:"webgpu",kernelFunc:ioe};var Ku;(function(r){r.Prod="*",r.Sum="+"})(Ku||(Ku={}));var tm=class{constructor(e,t,o,n){this.variableNames=["x"],this.uniforms="index : f32,",this.size=!0;let s=128;this.workGroupSize=[s,1,1],this.outputShape=t,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.exclusive=o,this.reverse=n,this.op=e,this.shaderKey=`cum_${this.op}_${this.exclusive}_${this.reverse}`}getUserCode(){let e=this.outputShape.length,t=this.op===Ku.Prod?"1.0":"0.0",o=this.exclusive?t:`getX(${KL(e,"coords",this.op)})`,n=this.outputShape[this.outputShape.length-1],s="",a="";return this.exclusive?(s=this.reverse?`end != ${n-1}`:"end != 0",a=this.reverse?"end + 1":"end - 1"):(s=this.reverse?`end + pow2 < ${n}`:"end >= pow2",a=this.reverse?"end + pow2":"end - pow2"),`
${ue("index")} {
if (index < uniforms.size) {
var coords = getCoordsFromIndex(index);
let end = ${jL(e,"coords",this.op)};
var val = ${o};
let pow2 = i32(pow(2.0, uniforms.index));
if (${s}) {
let idx = ${a};
${jL(e,"coords",this.op)} = idx;
val ${this.op}= getX(${KL(e,"coords",this.op)});
}
setOutputAtIndex(index, val);
}
}
`}};function KL(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.x, ${e}.y`;if(r===3)return`${e}.x, ${e}.y, ${e}.z`;if(r===4)return`${e}.x, ${e}.y, ${e}.z, ${e}.w`;throw Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function jL(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.y`;if(r===3)return`${e}.z`;if(r===4)return`${e}.w`;throw Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function px(r,e,t,o,n,s){let a=e.shape.length,i=I.getAxesPermutation([o],a),p=e;i!=null&&(p=Nr({inputs:{x:e},backend:t,attrs:{perm:i}}));let u=I.getInnerMostAxes(1,a)[0];if(u!==a-1)throw new Error(`WebGPU cumprod shader expects an inner-most axis=${e.shape.length-1} but got axis=${o}`);let c=p.shape[u],l=Lt({inputs:{x:p},backend:t});for(let m=0;m<=Math.ceil(Math.log2(c))-1;m++){let f=new tm(r,p.shape,!1,s),d=l,h=[{type:"float32",data:[m]}];l=t.runWebGPUProgram(f,[l],l.dtype,h),t.disposeData(d.dataId)}if(n){let m=new tm(r,p.shape,n,s),f=l,d=[{type:"float32",data:[0]}];l=t.runWebGPUProgram(m,[l],l.dtype,d),t.disposeData(f.dataId)}if(i!=null){let m=I.getUndoAxesPermutation(i),f=Nr({inputs:{x:l},backend:t,attrs:{perm:m}});return t.disposeData(l.dataId),t.disposeData(p.dataId),f}return l}function uoe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return px(Ku.Prod,n,t,s,a,i)}var XL={kernelName:hn,backendName:"webgpu",kernelFunc:uoe};function poe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return px(Ku.Sum,n,t,s,a,i)}var YL={kernelName:gn,backendName:"webgpu",kernelFunc:poe};var cx=class{constructor(e,t){this.variableNames=["x"],this.workGroupSize=[64,1,1],this.size=!0,this.uniforms="blockSize : i32,",this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey=`depthToSpace_${t}`,this.dataFormat=t}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let h = ${this.getHeightCoordString()};
let w = ${this.getWidthCoordString()};
let d = ${this.getDepthCoordString()};
let in_h = h / uniforms.blockSize;
let offset_h = h % uniforms.blockSize;
let in_w = w / uniforms.blockSize;
let offset_w = w % uniforms.blockSize;
let offset_d = (offset_h * uniforms.blockSize + offset_w) *
${this.getOutputDepthSize()};
let in_d = d + offset_d;
let rlt = ${this.getInputSamplingString()};
setOutputAtIndex(index, rlt);
}
}`}getHeightCoordString(){return this.dataFormat==="NHWC"?"coords[1]":"coords[2]"}getWidthCoordString(){return this.dataFormat==="NHWC"?"coords[2]":"coords[3]"}getDepthCoordString(){return this.dataFormat==="NHWC"?"coords[3]":"coords[1]"}getOutputDepthSize(){return this.dataFormat==="NHWC"?"uniforms.outShape[3]":"uniforms.outShape[1]"}getInputSamplingString(){return this.dataFormat==="NHWC"?"getX(b, in_h, in_w, in_d)":"getX(b, in_d, in_h, in_w)"}};function coe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockSize:s,dataFormat:a}=o,i=n.shape[0],p=a==="NHWC"?n.shape[1]:n.shape[2],u=a==="NHWC"?n.shape[2]:n.shape[3],c=a==="NHWC"?n.shape[3]:n.shape[1],l=p*s,m=u*s,f=c/(s*s),d=a==="NHWC"?[i,l,m,f]:[i,f,l,m],h=[{type:"int32",data:[s]}],g=new cx(d,a);return t.runWebGPUProgram(g,[n],n.dtype,h)}var QL={kernelName:yn,backendName:"webgpu",kernelFunc:coe};var lx=class{constructor(e,t,o,n=!1,s=null,a=!1){this.variableNames=["x","W"],this.uniforms="pad : vec2<i32>, inDims : vec2<i32>,",this.workGroupSize=[16,16,1],this.outputShape=e,this.dispatchLayout={x:[3],y:[2],z:[0,1]},this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),n&&this.variableNames.push("bias"),a&&this.variableNames.push("preluActivationWeights"),this.addBias=n,this.activation=s,this.hasPreluActivation=a,this.filterHeight=t,this.filterWidth=o,this.shaderKey=`depthwiseNCHW_${this.activation}_${this.filterHeight}_${this.filterWidth}`}getUserCode(){let e=this.filterWidth*this.filterHeight,t=this.workGroupSize[0]*this.workGroupSize[1]*this.workGroupSize[2],o=this.workGroupSize[1]+this.filterHeight-1,n=this.workGroupSize[0]+this.filterWidth-1;return`
${ur(this.activation,this.hasPreluActivation,!1,4)}
var<workgroup> mm_Asub : array<array<f32, ${n}>, ${o}>;
var<workgroup> mm_Bsub : array<array<f32, ${this.filterWidth}>, ${this.filterHeight}>;
fn readX(batch : i32, channel : i32, row : i32, col : i32) -> f32 {
var value = 0.0;
if (row >=0 && row < uniforms.inDims[0] && col >=0 && col < uniforms.inDims[1])
{
value = getX(batch, channel, row, col);
}
return value;
}
${Ri()}
fn _start(@builtin(local_invocation_id) LocalId : vec3<u32>,
@builtin(global_invocation_id) GlobalId : vec3<u32>,
@builtin(local_invocation_index) LocalIndex: u32,
@builtin(num_workgroups) NumWorkgroups: vec3<u32>) {
localId = LocalId;
globalId = GlobalId;
let localIndex = i32(LocalIndex);
numWorkgroups = NumWorkgroups;
let coords = getOutputCoords();
let batch = coords[0];
let xRCCorner = vec2<i32>(coords.zw) - uniforms.pad;
let channelMul = uniforms.wShape[3];
let d1 = coords[1] / channelMul;
let q = coords[1] % channelMul;
let inputRowStart = xRCCorner.x;
let inputColStart = xRCCorner.y;
let localRow = i32(localId.y);
let localCol = i32(localId.x);
// Load one tile of X into local memory.
for (var inputRow = localRow; inputRow < ${o}; inputRow = inputRow + ${this.workGroupSize[1]}) {
for (var inputCol = localCol; inputCol < ${n}; inputCol = inputCol + ${this.workGroupSize[0]}) {
let rowOffset = inputRow - localRow;
let colOffset = inputCol - localCol;
mm_Asub[inputRow][inputCol] = readX(batch, d1, inputRowStart + rowOffset, inputColStart + colOffset);
}
}
// Load one tile of W into local memory.
var wIndex = localIndex;
${e<t?`if (wIndex < ${e})`:`for(; wIndex < ${e}; wIndex = wIndex + ${t})`}
{
let wRow = wIndex / ${this.filterWidth};
let wCol = wIndex % ${this.filterWidth};
mm_Bsub[wRow][wCol] = getW(wRow, wCol, d1, q);
}
workgroupBarrier();
var value = 0.0;
for (var wR = 0; wR < ${this.filterHeight}; wR = wR + 1) {
for (var wC = 0; wC < ${this.filterWidth}; wC = wC + 1) {
let xVal = mm_Asub[localRow + wR][localCol + wC];
let wVal = mm_Bsub[wR][wC];
value = fma(xVal, wVal, value);
}
}
${Kr(this.addBias,this.activation)}
if (coordsInBounds4D(coords, uniforms.outShape)) {
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
`}};var Dc=class{constructor(e,t=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms="pad : vec2<i32>, inDims : vec2<i32>,",this.workGroupSize=[4,4,4],this.workPerThread=4,this.isVec4=!0,this.outputShape=e.outShape,this.dispatchLayout={x:[3],y:[2],z:[0,1]},this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[4,this.workPerThread,1]),x.assert(e.dataFormat==="channelsLast",()=>"TODO: NCHW is unimplemented"),t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.convInfo=e,this.addBias=t,this.activation=o,this.hasPreluActivation=n,this.shaderKey=`depthwiseVec4_${o}_${this.convInfo.filterHeight}_${this.convInfo.filterWidth}_${this.convInfo.strideHeight}_${this.convInfo.strideWidth}_${this.workPerThread}`}getUserCode(){let e=(this.workPerThread-1)*this.convInfo.strideWidth+this.convInfo.filterWidth;return`
${ur(this.activation,this.hasPreluActivation,!0,4)}
fn readX(batch : i32, row : i32, col : i32, channel : i32) -> vec4<f32> {
var value = vec4<f32>(0.0);
if (col >=0 && col < uniforms.inDims[1]) {
value = getX(batch, row, col, channel);
}
return value;
}
const strideHeight = ${this.convInfo.strideHeight};
const strideWidth = ${this.convInfo.strideWidth};
${Ri()}
fn _start(@builtin(global_invocation_id) globalId: vec3<u32>) {
let batch = i32(globalId.z) / uniforms.outShape[1];
let r = i32(globalId.z) % uniforms.outShape[1];
let c = i32(globalId.y) * ${this.workPerThread};
let d1 = i32(globalId.x) * 4;
let xRCCorner = vec2<i32>(r, c) * vec2<i32>(strideHeight, strideWidth) - uniforms.pad;
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
var xVals : array<vec4<f32>, ${e}>;
var dotProd : array<vec4<f32>, ${this.workPerThread}>;
for (var i = 0; i < ${this.workPerThread}; i++) {
dotProd[i] = vec4<f32>(0.0);
}
// Use constant instead of uniform can give better performance.
for (var wR = 0; wR < ${this.convInfo.filterHeight}; wR = wR + 1) {
let xR = xRCorner + wR;
if (xR >=0 && xR < uniforms.inDims[0]) {
for (var i = 0; i < ${e}; i++) {
xVals[i] = readX(batch, xR, xCCorner + i, d1);
}
for (var wC = 0; wC < ${this.convInfo.filterWidth}; wC = wC + 1) {
let wValue = getW(wR, wC, d1, 0);
for (var i = 0; i < ${this.workPerThread}; i++) {
dotProd[i] = fma(xVals[i * strideWidth + wC], wValue, dotProd[i]);
}
}
}
}
for (var i = 0; i < ${this.workPerThread}; i = i + 1) {
let coords = vec4<i32>(batch, r, c + i, d1);
if (coordsInBounds4D(coords, uniforms.outShape)) {
var value = dotProd[i];
${Kr(this.addBias,this.activation)}
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
}
`}};var Pc=class{constructor(e,t=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms=`pad : vec2<i32>, inDims : vec2<i32>, filterHeight : i32,
filterWidth : i32, stride : vec2<i32>, dilation : vec2<i32>,`,this.workGroupSize=[256,1,1],this.outputShape=e.outShape,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.isChannelsLast=e.dataFormat==="channelsLast",t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.convInfo=e,this.addBias=t,this.activation=o,this.hasPreluActivation=n,this.shaderKey=`depthwise_${this.activation}_${this.isChannelsLast}`}getUserCode(){let e=this.isChannelsLast?"getX(batch, xR, xC, d1);":"getX(batch, d1, xR, xC);";return`
${ur(this.activation,this.hasPreluActivation,!1,4)}
${ue()} {
let coords = getOutputCoords();
let batch = coords[0];
let xRCCorner = vec2<i32>(coords.${this.isChannelsLast?"yz":"zw"}) * uniforms.stride - uniforms.pad;
let d2 = coords[${this.isChannelsLast?3:1}];
let channelMul = uniforms.wShape[3];
let d1 = d2 / channelMul;
let q = d2 % channelMul;
let inputRowStart = xRCCorner.x;
let inputColStart = xRCCorner.y;
let inputRowEnd = inputRowStart + uniforms.filterHeight *
uniforms.dilation[0];
let inputColEnd = inputColStart + uniforms.filterWidth *
uniforms.dilation[1];
// Convolve x(?, ?, d1)|x(d1, ?, ?) with w(:, :, d1, q) to get
// y(yR, yC, d2)|y(d2, yR, yC). ? = to be determined. : = across all
// values in that axis. x(?, ?, d1) and y(yR, yC, d2) is for NHWC.
// x(d1, ?, ?) and y(d2, yR, yC) is for NCHW.
var value = 0.0;
// Extract if checking out of for loop for performance.
if (inputRowStart >= 0 && inputColStart >= 0 &&
inputRowEnd < uniforms.inDims[0] &&
inputColEnd < uniforms.inDims[1]) {
for (var wR = 0; wR < uniforms.filterHeight; wR = wR + 1) {
let xR = inputRowStart + wR * uniforms.dilation[0];
for (var wC = 0; wC < uniforms.filterWidth; wC = wC + 1) {
let xC = inputColStart + wC * uniforms.dilation[1];
let xVal = ${e};
let wVal = getW(wR, wC, d1, q);
value = value + xVal * wVal;
}
}
} else {
for (var wR = 0; wR < uniforms.filterHeight; wR = wR + 1) {
let xR = inputRowStart + wR * uniforms.dilation[0];
if (xR < 0 || xR >= uniforms.inDims[0]) {
continue;
}
for (var wC = 0; wC < uniforms.filterWidth; wC = wC + 1) {
let xC = inputColStart + wC * uniforms.dilation[1];
if (xC < 0 || xC >= uniforms.inDims[1]) {
continue;
}
let xVal = ${e};
let wVal = getW(wR, wC, d1, q);
value = value + xVal * wVal;
}
}
}
${Kr(this.addBias,this.activation)}
if (coordsInBounds4D(coords, uniforms.outShape)) {
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
`}};function loe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dataFormat:p,dilations:u,dimRoundingMode:c}=o,l=I.convertConv2DDataFormat(p),m=u;m==null&&(m=[1,1]);let f=I.computeConv2DInfo(n.shape,s.shape,a,m,i,c,!0,l),d=[{type:"int32",data:[f.padInfo.top,f.padInfo.left]},{type:"int32",data:[f.inHeight,f.inWidth]}],h=f.dataFormat==="channelsLast",g;return!h&&f.inHeight>16&&f.inWidth>16&&f.strideHeight===1&&f.strideWidth===1&&f.dilationWidth===1&&f.dilationHeight===1&&f.inChannels===f.outChannels?g=new lx(f.outShape,f.filterHeight,f.filterWidth):h&&f.inHeight>4&&f.inWidth>4&&f.strideWidth<=2&&f.inChannels===f.outChannels&&f.dilationHeight===1&&f.dilationWidth===1&&f.inChannels%4===0?g=new Dc(f):(g=new Pc(f),d.push({type:"int32",data:[f.filterHeight]},{type:"int32",data:[f.filterWidth]},{type:"int32",data:[f.strideHeight,f.strideWidth]},{type:"int32",data:[f.dilationHeight,f.dilationWidth]})),t.runWebGPUProgram(g,[n,s],n.dtype,d)}var ZL={kernelName:bn,backendName:"webgpu",kernelFunc:loe};var BS=it({opType:ye.MUL,cpuKernelImpl:sL,supportsComplex:!0}),JL={kernelName:ho,backendName:"webgpu",kernelFunc:BS};function rm(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return Ys(n,s,a,"sum",t)}var eB={kernelName:jn,backendName:"webgpu",kernelFunc:rm};function moe(r){let{inputs:e,backend:t,attrs:o}=r,{equation:n}=o,s=e,{allDims:a,summedDims:i,idDims:p}=I.decodeEinsumEquation(n,s.length);I.checkEinsumDimSizes(a.length,p,s);let{path:u,steps:c}=I.getEinsumComputePath(i,p),l=c.length,m=null,f=a.length,d=[];for(let h=0;h<l;++h){for(let g of c[h]){let{permutationIndices:y,expandDims:b}=I.getEinsumPermutation(f,p[g]),C;I.isIdentityPermutation(y)?C=s[g]:(C=Nr({inputs:{x:s[g]},backend:t,attrs:{perm:y}}),d.push(C));let w=C.shape.slice();for(let k=0;k<b.length;++k)w.splice(b[k],0,1);x.arraysEqual(C.shape,w)||(C=xe({inputs:{x:C},backend:t,attrs:{shape:w}}),d.push(C)),m===null?m=C:(m=BS({inputs:{a:C,b:m},backend:t}),d.push(m))}h<l-1&&(u[h]>=0&&(m=rm({inputs:{x:m},backend:t,attrs:{axis:u[h]-(a.length-f),keepDims:!1}}),d.push(m)),f--)}for(let h of d)h!==m&&t.disposeData(h.dataId);return m}var tB={kernelName:Xa,backendName:"webgpu",kernelFunc:moe};var foe=Ge({opType:pe.ELU}),rB={kernelName:In,backendName:"webgpu",kernelFunc:foe};var doe=it({opType:ye.EQUAL,dtype:"bool",cpuKernelImpl:HM}),oB={kernelName:oo,backendName:"webgpu",kernelFunc:doe};var VS=Ge({opType:pe.EXP,cpuKernelImpl:qM,dtype:"float32"}),nB={kernelName:no,backendName:"webgpu",kernelFunc:VS};function mx(r){let{inputs:e,attrs:t,backend:o}=r,{dim:n}=t,{input:s}=e,a=s.shape.length,i=s.shape.slice(),p=n;return n<0&&(x.assert(-(a+1)<=n,()=>`Axis must be in the interval [${-(a+1)}, ${a}]`),p=a+n+1),i.splice(p,0,1),xe({inputs:{x:s},backend:o,attrs:{shape:i}})}var sB={kernelName:xs,backendName:"webgpu",kernelFunc:mx};var hoe=Ge({opType:pe.EXPM1,cpuKernelImpl:KM}),aB={kernelName:wn,backendName:"webgpu",kernelFunc:hoe};var fx=class{constructor(e){this.outputShape=[],this.variableNames=["x"],this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey="flipLeftRight"}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let coordX = uniforms.xShape[2] - coords[2] - 1;
let outputValue = getX(coords[0], coords[1], coordX, coords[3]);
setOutputAtIndex(index, outputValue);
}
}
`}};var iB={kernelName:Sn,backendName:"webgpu",kernelFunc:({inputs:r,backend:e})=>{let{image:t}=r,o=e,n=new fx(t.shape);return o.runWebGPUProgram(n,[t],t.dtype)}};var goe=Ge({opType:pe.FLOOR,cpuKernelImpl:jM}),uB={kernelName:so,backendName:"webgpu",kernelFunc:goe};var xoe=it({opType:ye.INT_DIV,dtype:"int32"}),pB={kernelName:vn,backendName:"webgpu",kernelFunc:xoe};var dx=class{constructor(e,t,o=!1){this.isFromPixels=!0,this.outputShape=[0],this.variableNames=[],this.workGroupSize=[256,1,1],this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[t,1,1]),this.importVideo=o,this.shaderKey=`fromPixels_${this.importVideo}`}getUserCode(){let e=this.importVideo?"textureLoad(src, vec2<i32>(coords.yx));":"textureLoad(src, vec2<i32>(coords.yx), 0)";return`
@binding(1) @group(0) var src: ${this.importVideo?"texture_external":"texture_2d<f32>"};
${ue("index")} {
let flatIndex = index * uniforms.numChannels;
if (flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
let values = ${e};
for (var i = 0; i < uniforms.numChannels; i = i + 1) {
result[flatIndex + i] = i32(floor(255.0 * values[i]));
}
}
}
`}};var cB={kernelName:Zi,backendName:"webgpu",kernelFunc:yoe},Oc,zS=P().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU"),hx=new Map;function yoe(r){let{inputs:e,backend:t,attrs:o}=r,{pixels:n}=e,{numChannels:s}=o;if(n==null)throw new Error("pixels passed to tf.browser.fromPixels() can not be null");let a=typeof HTMLVideoElement!="undefined"&&n instanceof HTMLVideoElement,i=typeof HTMLImageElement!="undefined"&&n instanceof HTMLImageElement,p=typeof HTMLCanvasElement!="undefined"&&n instanceof HTMLCanvasElement||typeof OffscreenCanvas!="undefined"&&n instanceof OffscreenCanvas,u=typeof ImageBitmap!="undefined"&&n instanceof ImageBitmap,[c,l]=a?[n.videoWidth,n.videoHeight]:[n.width,n.height],m=[l,c,s],f=!1,d=a||i;if(u||p||d){let b;if(f){let D=n;if(!hx.has(D)||hx.get(D).expired){let O={source:D};hx.set(D,t.device.importExternalTexture(O))}b={width:c,height:l,format:null,usage:null,texture:hx.get(D)}}else{if(d){let L=P().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");(Oc==null||L!==zS)&&(zS=L,Oc=document.createElement("canvas").getContext("2d",{willReadFrequently:zS})),Oc.canvas.width=c,Oc.canvas.height=l,Oc.drawImage(n,0,0,c,l),n=Oc.canvas}let D=GPUTextureUsage.COPY_DST|GPUTextureUsage.RENDER_ATTACHMENT|GPUTextureUsage.TEXTURE_BINDING,O="rgba8unorm",M=t.textureManager.acquireTexture(m[1],m[0],O,D);t.queue.copyExternalImageToTexture({source:n},{texture:M},[m[1],m[0]]),b={width:c,height:l,format:O,usage:D,texture:M}}let C=x.sizeFromShape(m),w=x.computeStrides(m),k=new dx(m,s,f),_=[{type:"uint32",data:[C]},{type:"uint32",data:[s]},{type:"uint32",data:[...w]}],E=t.makeTensorInfo([l,c],"int32"),R=t.tensorMap.get(E.dataId);R.resourceInfo=b;let A=t.runWebGPUProgram(k,[E],"int32",_);return t.disposeData(E.dataId),A}let h=n.data,g=h;if(s!=null&&s!==4){g=new Uint8Array(n.width*n.height*s);let b=h.length,C=0;for(let w=0;w<b;w++)w%4<s&&(g[C++]=h[w])}let y=t.makeTensorInfo(m,"int32",new Int32Array(g));return t.uploadToGPU(y.dataId),y}var gx=class{constructor(e,t,o,n,s){this.uniforms="varianceEpsilon : f32,",this.workGroupSize=[128,1,1],this.size=!0,this.variableNames=["x","mean","variance"],I.assertAndGetBroadcastShape(e,t),I.assertAndGetBroadcastShape(e,o),this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),n!=null&&(I.assertAndGetBroadcastShape(e,n),this.variableNames.push("offset")),s!=null&&(I.assertAndGetBroadcastShape(e,s),this.variableNames.push("scale")),this.offsetShape=n,this.scaleShape=s,this.shaderKey="batchNorm"}getUserCode(){let e="0.0";this.offsetShape!=null&&(e="getOffsetByOutputIndex(index)");let t="1.0";return this.scaleShape!=null&&(t="getScaleByOutputIndex(index)"),`
${ue("index")} {
if (index < uniforms.size)
{
let xValue = getXByOutputIndex(index);
let meanValue = getMeanByOutputIndex(index);
let varianValue = getVarianceByOutputIndex(index);
let offsetValue = ${e};
let scaleValue = ${t};
let inv = scaleValue * inverseSqrt(varianValue + f32(uniforms.varianceEpsilon));
setOutputAtIndex(index,dot(vec3<f32>(xValue, -meanValue, offsetValue), vec3<f32>(inv, inv, 1.0)));
}
}
`}};var lB={kernelName:kn,backendName:"webgpu",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o,scale:n,offset:s,mean:a,variance:i}=r,{varianceEpsilon:p}=e,u=t,c=[o,a,i],l=null;s!=null&&(l=s.shape,c.push(s));let m=null;n!=null&&(m=n.shape,c.push(n));let f=new gx(o.shape,a.shape,i.shape,l,m),d=[{type:"float32",data:[p]}];return u.runWebGPUProgram(f,c,o.dtype,d)}};function boe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dataFormat:c,dilations:l,dimRoundingMode:m,activation:f,leakyreluAlpha:d}=o,h=I.convertConv2DDataFormat(c),g=I.computeConv2DInfo(n.shape,s.shape,p,l,u,m,!1,h);return sx({x:n,filter:s,convInfo:g,backend:t,bias:a,preluActivationWeights:i,leakyreluAlpha:d,activation:f})}var mB={kernelName:Do,backendName:"webgpu",kernelFunc:boe};function Coe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dilations:c,dimRoundingMode:l,activation:m,leakyreluAlpha:f}=o,d=c;d==null&&(d=[1,1]),x.assert(I.eitherStridesOrDilationsAreOne(p,d),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${p} and dilations '${d}'`);let h=I.computeConv2DInfo(n.shape,s.shape,p,d,u,l,!0),g=[n,s],y=a!=null,b=i!=null;y&&g.push(a),b&&g.push(i);let C=[{type:"int32",data:[h.padInfo.top,h.padInfo.left]},{type:"int32",data:[h.inHeight,h.inWidth]}],w;return h.inHeight>4&&h.inWidth>4&&h.strideWidth<=2&&h.inChannels===h.outChannels&&h.dilationHeight===1&&h.dilationWidth===1&&h.inChannels%4===0?w=new Dc(h,y,m,b):(w=new Pc(h,y,m,b),C.push({type:"int32",data:[h.filterHeight]},{type:"int32",data:[h.filterWidth]},{type:"int32",data:[h.strideHeight,h.strideWidth]},{type:"int32",data:[h.dilationHeight,h.dilationWidth]})),m==="leakyrelu"&&(C.push({type:"float32",data:[f]}),w.uniforms+=" alpha : f32,"),t.runWebGPUProgram(w,g,"float32",C)}var fB={kernelName:Po,backendName:"webgpu",kernelFunc:Coe};var xx=class{constructor(e,t){this.variableNames=["A","indices"],this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey=`gathernd_${e}`,this.sliceDim=e,this.uniforms=`sliceDim : i32, strides : ${At(e)},`}getUserCode(){let e;return this.sliceDim>1?e="uniforms.strides[j]":e="uniforms.strides",`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var flattenIndex = 0;
for (var j = 0; j < uniforms.sliceDim; j = j + 1) {
let indexTemp = i32(round(getIndices(coords[0], j)));
let strideNum = ${e};
flattenIndex = flattenIndex + indexTemp * strideNum;
}
setOutputAtIndex(index, getA(flattenIndex, coords[1]));
}
}
`}};function Ioe(r){let{inputs:e,backend:t}=r,{params:o,indices:n}=e,s=n.shape,a=s[s.length-1],i=x.sizeFromShape(o.shape),[p,u,c,l]=I.prepareAndValidate(o,n),m=xe({inputs:{x:n},backend:t,attrs:{shape:[u,a]}}),f=xe({inputs:{x:o},backend:t,attrs:{shape:[x.sizeFromShape(o.shape)/c,c]}});if(t.shouldExecuteOnCPU([o,n])||o.dtype==="string"){let b=t.readSync(n.dataId),C=t.bufferSync(o),w=XM(b,C,o.dtype,u,a,c,l,o.shape,i);return t.makeTensorInfo(p,o.dtype,w.values)}let d=new xx(a,[u,c]),h=[{type:"int32",data:[a]},{type:"int32",data:l}],g=t.runWebGPUProgram(d,[f,m],f.dtype,h),y=xe({inputs:{x:g},backend:t,attrs:{shape:p}});return t.disposeData(m.dataId),t.disposeData(f.dataId),t.disposeData(g.dataId),y}var dB={kernelName:Tn,backendName:"webgpu",kernelFunc:Ioe};var yx=class{constructor(e,t){this.variableNames=["A","indices"],this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e.slice(),this.aShape=e,this.outputShape=t,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey="gather"}getUserCode(){let e=woe(this.aShape);return`
${ue("index")} {
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
let indexZ = i32(getIndices(resRC.x, resRC.z));
let inBounds = select(0.0, 1.0, indexZ >= 0 && indexZ < uniforms.aShape[2]);
setOutputAtIndex(index, inBounds * getA(${e}));
}
}
`}};function woe(r){let e=["resRC.x","resRC.y","resRC.z","resRC.w"],t=[];for(let o=0;o<r.length;o++)o===2?t.push("indexZ"):t.push(`${e[o]}`);return t.join()}function WS(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,indices:s}=e,{axis:a,batchDims:i}=o,p=x.parseAxisParam(a,n.shape)[0],u=I.segment_util.collectGatherOpShapeInfo(n,s,p,i),c=x.sizeFromShape(s.shape),l=[],m=xe({inputs:{x:n},backend:t,attrs:{shape:[u.batchSize,u.outerSize,u.dimSize,u.sliceSize]}}),f=xe({inputs:{x:s},backend:t,attrs:{shape:[u.batchSize,c/u.batchSize]}});l.push(m),l.push(f);let d=[u.batchSize,u.outerSize,c/u.batchSize,u.sliceSize];if(t.shouldExecuteOnCPU([n,s])){let C=t.tensorMap.get(f.dataId).values,w=ne(f.shape,f.dtype,C),_=t.tensorMap.get(m.dataId).values,E=ne(m.shape,m.dtype,_),R=YM(E,w,d);return l.forEach(A=>t.disposeData(A.dataId)),t.makeTensorInfo(u.outputShape,R.dtype,R.values)}let h=new yx(m.shape,d),g=t.runWebGPUProgram(h,[m,f],m.dtype);l.push(g);let y=xe({inputs:{x:g},backend:t,attrs:{shape:u.outputShape}});return l.forEach(b=>t.disposeData(b.dataId)),y}var hB={kernelName:bs,backendName:"webgpu",kernelFunc:WS};var Soe=it({opType:ye.GREATER,cpuKernelImpl:ZM,dtype:"bool"}),gB={kernelName:ao,backendName:"webgpu",kernelFunc:Soe};var voe=it({opType:ye.GREATER_EQUAL,dtype:"bool",cpuKernelImpl:QM}),xB={kernelName:io,backendName:"webgpu",kernelFunc:voe};var koe=Ge({opType:pe.IS_NAN,dtype:"bool"}),yB={kernelName:ia,backendName:"webgpu",kernelFunc:koe};function Toe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{alpha:s}=o,a=[{type:"float32",data:[s]}],i=new Zo(n.shape,pe.LEAKYRELU);return i.uniforms="alpha : f32,",t.runWebGPUProgram(i,[n],"float32",a)}var bB={kernelName:Nn,backendName:"webgpu",kernelFunc:Toe};var Noe=it({opType:ye.LESS,dtype:"bool",cpuKernelImpl:eL}),CB={kernelName:po,backendName:"webgpu",kernelFunc:Noe};var _oe=it({opType:ye.LESS_EQUAL,dtype:"bool",cpuKernelImpl:JM}),IB={kernelName:co,backendName:"webgpu",kernelFunc:_oe};var Eoe=Ge({opType:pe.LOG,cpuKernelImpl:tL}),wB={kernelName:lo,backendName:"webgpu",kernelFunc:Eoe};var $oe=it({opType:ye.LOGICAL_AND,dtype:"bool"}),SB={kernelName:_n,backendName:"webgpu",kernelFunc:$oe};var Roe=Ge({opType:pe.LOGICAL_NOT}),vB={kernelName:En,backendName:"webgpu",kernelFunc:Roe};var Aoe=it({opType:ye.MAX,cpuKernelImpl:oL}),kB={kernelName:mo,backendName:"webgpu",kernelFunc:Aoe};function Foe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1,c=I.computePool2DInfo(n.shape,s,a,u,i,p);return Zg(n,c,"max",t)}var TB={kernelName:Rn,backendName:"webgpu",kernelFunc:Foe};function Doe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return Ys(n,s,a,"min",t)}var NB={kernelName:Fn,backendName:"webgpu",kernelFunc:Doe};var Poe=it({opType:ye.MIN,cpuKernelImpl:nL}),_B={kernelName:fo,backendName:"webgpu",kernelFunc:Poe};var bx=class{constructor(e,t,o){this.uniforms="",this.variableNames=["x"],this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=t.map((n,s)=>n[0]+e[s]+n[1]),this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.xShape=e,t.map((n,s)=>{this.uniforms+=` pad${s} : vec2<i32>,`}),this.offset=o==="reflect"?0:1,this.shaderKey=`mirrorPad_${o}`}getUserCode(){let e=this.xShape.length,t=this.xShape.map((u,c)=>`uniforms.pad${c}[0]`).join(","),o=this.xShape.map((u,c)=>`uniforms.pad${c}[0] + uniforms.xShape${e>1?`[${c}]`:""}`).join(","),n=e===1?"start":"start[i]",s=e===1?"end":"end[i]",a=e===1?"outC":"outC[i]",i=At(e),p=e>1?["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,e):"coords";return`
${ue("index")} {
if (index < uniforms.size) {
let start = ${i}(${t});
let end = ${i}(${o});
var outC = getCoordsFromIndex(index);
for (var i = 0; i < ${e}; i = i + 1) {
if (${a} < ${n}) {
${a} = ${n} * 2 - ${a} - ${this.offset};
} else if(${a} >= ${s}) {
${a} = (${s} - 1) * 2 - ${a} + ${this.offset};
}
}
let coords = outC - start;
setOutputAtIndex(index, getX(${p}));
}
}
`}};var EB={kernelName:Dn,backendName:"webgpu",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o}=r,{paddings:n,mode:s}=e,a=t,i=n.map(c=>({type:"int32",data:[c[0],c[1]]})),p=new bx(o.shape,n,s);return a.runWebGPUProgram(p,[o],o.dtype,i)}};function Ooe(r){let{inputs:e,backend:t}=r,{x:o}=e;if(t.shouldExecuteOnCPU([o])){let s=t.tensorMap.get(o.dataId),[a,i]=aL(s.values,o.shape,o.dtype);return t.makeTensorInfo(i,o.dtype,a)}let n=new Zo(o.shape,pe.NEG);return t.runWebGPUProgram(n,[o],o.dtype)}var $B={kernelName:Pn,backendName:"webgpu",kernelFunc:Ooe};function Moe(r){console.warn("tf.nonMaxSuppression() in webgpu locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p}=o,u=t.readSync(n.dataId),c=t.readSync(s.dataId),{selectedIndices:l}=Bt.nonMaxSuppressionV3Impl(u,c,a,i,p);return t.makeTensorInfo([l.length],"int32",new Int32Array(l))}var RB={kernelName:On,backendName:"webgpu",kernelFunc:Moe};function Loe(r){console.warn("tf.nonMaxSuppression() in webgpu locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,softNmsSigma:u}=o,c=t.readSync(n.dataId),l=t.readSync(s.dataId),m=a,f=i,d=p,h=u,{selectedIndices:g,selectedScores:y}=Bt.nonMaxSuppressionV5Impl(c,l,m,f,d,h);return[t.makeTensorInfo([g.length],"int32",new Int32Array(g)),t.makeTensorInfo([y.length],"float32",new Float32Array(y))]}var AB={kernelName:Mn,backendName:"webgpu",kernelFunc:Loe};function om(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="complex64"){let n=Wa({inputs:{input:o},backend:t}),s=om({inputs:{x:n},backend:t}),a=qu({inputs:{input:o},backend:t}),i=om({inputs:{x:a},backend:t}),p=ls({inputs:{real:s,imag:i},backend:t});return t.disposeData(n.dataId),t.disposeData(s.dataId),t.disposeData(a.dataId),t.disposeData(i.dataId),p}else return $o({attrs:{shape:o.shape,dtype:o.dtype,value:o.dtype==="string"?"":0},backend:t})}var FB={kernelName:Es,backendName:"webgpu",kernelFunc:om};function DB(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="string")throw new Error("onesLike is not supported under string dtype");if(o.dtype==="complex64"){let n=Wa({inputs:{input:o},backend:t}),s=DB({inputs:{x:n},backend:t}),a=qu({inputs:{input:o},backend:t}),i=om({inputs:{x:a},backend:t}),p=ls({inputs:{real:s,imag:i},backend:t});return t.disposeData(n.dataId),t.disposeData(s.dataId),t.disposeData(a.dataId),t.disposeData(i.dataId),p}else return $o({attrs:{shape:o.shape,dtype:o.dtype,value:1},backend:t})}var PB={kernelName:Cs,backendName:"webgpu",kernelFunc:DB};function Boe(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o;if(e.length===1)return mx({inputs:{input:e[0]},backend:t,attrs:{dim:n}});let s=e[0].shape,a=e[0].dtype;e.forEach(c=>{x.assertShapesMatch(s,c.shape,"All tensors passed to stack must have matching shapes"),x.assert(a===c.dtype,()=>"All tensors passed to stack must have matching dtypes")});let i=[],p=e.map(c=>{let l=mx({inputs:{input:c},backend:t,attrs:{dim:n}});return i.push(l),l}),u=LS({inputs:p,backend:t,attrs:{axis:n}});return i.forEach(c=>t.disposeData(c.dataId)),u}var OB={kernelName:Is,backendName:"webgpu",kernelFunc:Boe};var Cx=class{constructor(e,t){this.variableNames=["x"],this.uniforms="constantValue : f32,",this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=t.map((o,n)=>o[0]+e[n]+o[1]),this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),t.map((o,n)=>{this.uniforms+=` pad${n} : vec2<i32>,`}),this.xShape=e,this.shaderKey="pad"}getUserCode(){let e=this.xShape.length,t=At(e),o=this.xShape.map((l,m)=>`uniforms.pad${m}[0]`).join(","),n=this.xShape.map((l,m)=>`uniforms.pad${m}[0] + uniforms.xShape${e>1?`[${m}]`:""}`).join(","),s=e>1?`${t}(${o})`:`${o}`,a=e>1?`${t}(${n})`:`${n}`,i=e>1?"any(outC < start)":"outC < start",p=e>1?"any(outC >= end)":"outC >= end",u=e>1?["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,e):"coords";return`
${ue("index")} {
if (index < uniforms.size) {
let start = ${s};
let end = ${a};
let outC = getCoordsFromIndex(index);
if (${i} || ${p}) {
setOutputAtIndex(index, uniforms.constantValue);
} else {
let coords = outC - start;
setOutputAtIndex(index, getX(${u}));
}
}
}
`}};var US=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{paddings:s,constantValue:a}=o;if(s.every(u=>x.arraysEqual(u,[0,0])))return Lt({inputs:{x:n},backend:t});if(x.sizeFromShape(n.shape)===0){let u=s.map((c,l)=>c[0]+n.shape[l]+c[1]);return $o({backend:t,attrs:{shape:u,value:a,dtype:n.dtype}})}let i=[{type:"float32",data:[a]}];s.map(u=>i.push({type:"int32",data:[u[0],u[1]]}));let p=new Cx(n.shape,s);return t.runWebGPUProgram(p,[n],n.dtype,i)},MB={kernelName:Ln,backendName:"webgpu",kernelFunc:US};var Voe=it({opType:ye.POW}),LB={kernelName:Bn,backendName:"webgpu",kernelFunc:Voe};function zoe(r){let{inputs:e,backend:t}=r,{x:o,alpha:n}=e,s=new Hu(ye.PRELU,o.shape,n.shape);return t.runWebGPUProgram(s,[o,n],"float32")}var BB={kernelName:Vn,backendName:"webgpu",kernelFunc:zoe};function Woe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return Ys(n,s,a,"prod",t)}var VB={kernelName:Ao,backendName:"webgpu",kernelFunc:Woe};var Uoe=r=>{let{backend:e,attrs:t}=r,{start:o,stop:n,step:s,dtype:a}=t,i=pL(o,n,s,a);return e.makeTensorInfo([i.length],a,i)},zB={kernelName:ws,backendName:"webgpu",kernelFunc:Uoe};var GS=it({opType:ye.DIV}),WB={kernelName:Cn,backendName:"webgpu",kernelFunc:GS};var Goe=Ge({opType:pe.RECIPROCAL}),UB={kernelName:ma,backendName:"webgpu",kernelFunc:Goe};var Hoe=Ge({opType:pe.RELU}),GB={kernelName:zn,backendName:"webgpu",kernelFunc:Hoe};var qoe=Ge({opType:pe.RELU6}),HB={kernelName:Gn,backendName:"webgpu",kernelFunc:qoe};var Ix=class{constructor(e,t,o){this.variableNames=["x"],this.uniforms="adjustHeightWidth : vec2<f32>, halfPixelCenters : f32,",this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=[e[0],t,o,e[3]],this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey="resizeBilinear"}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let d = coords[3];
let rc = coords.yz;
let effectiveInSize = vec2<f32>(
f32(uniforms.xShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.xShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveOutSize = vec2<f32>(
f32(uniforms.outShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.outShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveInputOverOutputRatioRC =
effectiveInSize / effectiveOutSize;
// Fractional source index
let sourceFracIndexRC =
(vec2<f32>(rc) + vec2<f32>(uniforms.halfPixelCenters)) *
effectiveInputOverOutputRatioRC - vec2<f32>(uniforms.halfPixelCenters);
// Compute the four integer indices.
let sourceFloorRC = vec2<i32>(sourceFracIndexRC);
let sourceCeilRC = vec2<i32>(
min(vec2<f32>(uniforms.xShape.yz) - vec2<f32>(1.0), ceil(sourceFracIndexRC)));
let topLeft = getX(b, sourceFloorRC.x, sourceFloorRC.y, d);
let bottomLeft = getX(b, sourceCeilRC.x, sourceFloorRC.y, d);
let topRight = getX(b, sourceFloorRC.x, sourceCeilRC.y, d);
let bottomRight = getX(b, sourceCeilRC.x, sourceCeilRC.y, d);
let fracRC = sourceFracIndexRC - vec2<f32>(sourceFloorRC);
let top = topLeft + (topRight - topLeft) * fracRC.y;
let bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y;
let newValue = top + (bottom - top) * fracRC.x;
setOutputAtIndex(index, newValue);
}
}
`}};function Koe(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,size:a,halfPixelCenters:i}=o,[p,u]=a,c=s&&p>1?1:0,l=s&&u>1?1:0,f=[{type:"float32",data:[c,l]},{type:"float32",data:[i?.5:0]}],d=new Ix(n.shape,p,u);return t.runWebGPUProgram(d,[n],"float32",f)}var qB={kernelName:Un,backendName:"webgpu",kernelFunc:Koe};var wx=class{constructor(e,t,o,n){this.variableNames=["x"],this.uniforms="adjustHeightWidth : vec2<f32>, roundBase : f32,",this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=[e[0],t,o,e[3]],this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.halfPixelCenters=n,this.shaderKey=`resizeNearest_${n}`}getUserCode(){let e;return this.halfPixelCenters?e="max((vec2<f32>(rc) + vec2<f32>(0.5)) * effectiveInputOverOutputRatioRC, vec2<f32>(0.0))":e="vec2<f32>(rc) * effectiveInputOverOutputRatioRC",`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let d = coords[3];
let rc = coords.yz;
let effectiveInSize = vec2<f32>(
f32(uniforms.xShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.xShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveOutSize = vec2<f32>(
f32(uniforms.outShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.outShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveInputOverOutputRatioRC =
effectiveInSize / effectiveOutSize;
// Fractional source index
let sourceFracIndexRC = ${e};
// Compute the coordinators of nearest neighbor point.
let inputShapeRC = vec2<f32>(f32(uniforms.xShape.y), f32(uniforms.xShape.z));
let sourceNearestRC = vec2<i32>(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + uniforms.roundBase)));
let newValue = getX(b, sourceNearestRC.x, sourceNearestRC.y, d);
setOutputAtIndex(index, newValue);
}
}
`}};function joe(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=s&&p>1?1:0,l=s&&u>1?1:0,f=[{type:"float32",data:[c,l]},{type:"float32",data:[s?.5:0]}],d=new wx(n.shape,p,u,a);return t.runWebGPUProgram(d,[n],n.dtype,f)}var KB={kernelName:Wn,backendName:"webgpu",kernelFunc:joe};var Sx=class{constructor(e,t){this.outputShape=[],this.variableNames=["x"],this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.uniforms=`centerX : f32, centerY : f32, sinRadians : f32,
cosRadians : f32,`,this.shaderKey="rotate",this.outputShape=e,typeof t=="number"?(this.uniforms+=" fillValue : f32,",this.fillSnippet="var outputValue = uniforms.fillValue;",this.shaderKey+="_float"):(this.uniforms+=" fillValue : vec3<f32>,",this.fillSnippet="var outputValue = uniforms.fillValue[coords[3]];",this.shaderKey+="_vec3")}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let coordXFloat = (f32(coords[2]) - uniforms.centerX) *
uniforms.cosRadians - (f32(coords[1]) - uniforms.centerY) *
uniforms.sinRadians;
let coordYFloat = (f32(coords[2]) - uniforms.centerX) *
uniforms.sinRadians + (f32(coords[1]) - uniforms.centerY) *
uniforms.cosRadians;
let coordX = i32(round(coordXFloat + uniforms.centerX));
let coordY = i32(round(coordYFloat + uniforms.centerY));
${this.fillSnippet}
if(coordX >= 0 && coordX < uniforms.xShape[2] && coordY >= 0 &&
coordY < uniforms.xShape[1]) {
outputValue = getX(coords[0], coordY, coordX, coords[3]);
}
setOutputAtIndex(index, outputValue);
}
}
`}};var jB={kernelName:es,backendName:"webgpu",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{image:o}=r,{radians:n,fillValue:s,center:a}=e,i=t,p=new Sx(o.shape,s),[u,c]=I.getImageCenter(a,o.shape[1],o.shape[2]),l=[{type:"float32",data:[u]},{type:"float32",data:[c]},{type:"float32",data:[Math.sin(n)]},{type:"float32",data:[Math.cos(n)]}];return typeof s=="number"?l.push({type:"float32",data:[Number.parseFloat(s.toFixed(2))]}):l.push({type:"float32",data:s}),i.runWebGPUProgram(p,[o],o.dtype,l)}};var Xoe=Ge({opType:pe.RSQRT,cpuKernelImpl:cL}),XB={kernelName:xo,backendName:"webgpu",kernelFunc:Xoe};var Fi=class{constructor(e,t,o,n,s,a,i,p=!0){this.variableNames=["updates","indices"],this.workGroupSize=[64,1,1],this.atomic=!0,this.outputShape=a,this.type=i,this.sumDupeIndices=p,this.dispatchLayout=fe(e),this.dispatch=ae(this.dispatchLayout,e,this.workGroupSize),this.sliceDimGreaterThanOne=t>1,this.shaderKey=`scatter_${o}_${n}_${this.sliceDimGreaterThanOne}_${i}_${p}`;let u=At(s.length);this.uniforms=`sliceDim : i32, strides: ${u}, size: i32,`,this.updatesRank=n,this.indicesRank=o}getUserCode(){let e="";this.indicesRank===1?e="coords[0]":this.indicesRank===2&&(e="coords[0], j");let t=`getIndices(${e})`,o=this.sliceDimGreaterThanOne?"uniforms.strides[j]":"uniforms.strides",n="",s="";this.dispatchLayout.x.length===1?(n="flattenedIndex",s=`
fn getUpdatesCoordsFromFlatIndex(index : i32) -> i32 {
return index;
}
`):this.dispatchLayout.x.length===2&&(n="vec2<i32>(flattenedIndex, coords[1])",s=`
fn getUpdatesCoordsFromFlatIndex(index : i32) -> vec2<i32> {
// N.B. |updates| could be a scalar tensor, conceptually representing a
// 2D tensor with all values equal to that. By design, its size must be
// the same as |outShape[1]| in one dimension, and |indicesShape[0]|
// gives the other.
let sliceSize = uniforms.outShape[1];
let d0 = index / sliceSize;
let d1 = index - d0 * sliceSize;
return vec2<i32>(d0, d1);
}
`);let i=`getUpdates(${Array.from({length:this.updatesRank},(c,l)=>`coords[${l}]`).join(", ")})`,p=(c,l)=>{let m=`atomicAdd(${c}, bitcast<i32>(${l}))`;this.type==="float32"&&(m=`
{
var oldBits = 0;
var newBits = bitcast<i32>(${l});
loop {
let info = atomicCompareExchangeWeak(${c}, oldBits, newBits);
if (info.exchanged) {
break;
}
oldBits = info.old_value;
let oldValue = bitcast<f32>(oldBits);
let newValue = oldValue + (${l});
newBits = bitcast<i32>(newValue);
}
}
`);let f=`atomicStore(${c}, bitcast<i32>(${l}));`;return this.sumDupeIndices?m:f};return`
${s}
${ue("index")} {
if (index < uniforms.size) {
let coords = getUpdatesCoordsFromFlatIndex(index);
var flattenedIndex = 0;
for (var j = 0; j < uniforms.sliceDim; j = j + 1) {
let indexInside = i32(round(${t}));
flattenedIndex = flattenedIndex + indexInside * ${o};
}
let updateValue =
${Tc(this.type,!1)}(${i});
let flatIndex = getOutputIndexFromCoords(${n});
${p("&result[flatIndex]","updateValue")};
}
}`}};function Yoe(r){let{inputs:e,backend:t,attrs:o}=r,{indices:n,updates:s}=e,{shape:a}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=I.calculateShapes(s,n,a),m=[l/u,u];if(l===0)return t.makeTensorInfo(a,n.dtype);let f=xe({inputs:{x:n},backend:t,attrs:{shape:[p,i]}}),d=xe({inputs:{x:s},backend:t,attrs:{shape:[p,u]}}),h=d.dtype,g=$o({backend:t,attrs:{shape:m,value:0,dtype:h}}),y=x.sizeFromShape(d.shape),b=[{type:"int32",data:[i]},{type:"int32",data:c},{type:"int32",data:[y]}],C=new Fi(d.shape,i,f.shape.length,d.shape.length,c,m,h),w=t.runWebGPUProgram(C,[d,f],h,b,g),k=xe({inputs:{x:w},backend:t,attrs:{shape:a}});return t.disposeData(f.dataId),t.disposeData(d.dataId),t.disposeData(w.dataId),k}var YB={kernelName:Hn,backendName:"webgpu",kernelFunc:Yoe};var vx=class{constructor(e,t,o){this.variableNames=["c","a","b"],this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.cRank=e,this.rank=o,this.shaderKey="select"}getUserCode(){let e,t;if(this.rank>4)throw Error(`Where for rank ${this.rank} is not yet supported`);if(this.rank===1)t="resRC",e="resRC";else{let n=["resRC.x","resRC.y","resRC.z","resRC.w"],s=[],a=[];for(let i=0;i<this.outputShape.length;i++)a.push(`${n[i]}`),i<this.cRank&&s.push(`${n[i]}`);e=s.join(),t=a.join()}return`
${ue("index")} {
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
let cVal = getC(${e});
if (cVal >= 1.0) {
setOutputAtIndex(index, getA(${t}));
} else {
setOutputAtIndex(index, getB(${t}));
}
}
}
`}};function Qoe(r){let{inputs:e,backend:t}=r,{condition:o,t:n,e:s}=e,a=new vx(o.shape.length,n.shape,n.shape.length);return t.runWebGPUProgram(a,[o,n,s],ct(n.dtype,s.dtype))}var QB={kernelName:vs,backendName:"webgpu",kernelFunc:Qoe};var Zoe=Ge({opType:pe.SIGMOID}),ZB={kernelName:yo,backendName:"webgpu",kernelFunc:Zoe};var Joe=Ge({opType:pe.SIN}),JB={kernelName:Kn,backendName:"webgpu",kernelFunc:Joe};var ene=Ge({opType:pe.SINH}),eV={kernelName:ha,backendName:"webgpu",kernelFunc:ene};var HS=it({opType:ye.SUB,cpuKernelImpl:gL,supportsComplex:!0}),tV={kernelName:Io,backendName:"webgpu",kernelFunc:HS};function tne(r){let{inputs:e,backend:t,attrs:o}=r,{logits:n}=e,{dim:s}=o,a=x.parseAxisParam([s],n.shape),i=em({inputs:{x:n},backend:t,attrs:{reductionIndices:a,keepDims:!1}}),p=I.expandShapeToKeepDim(i.shape,a),u=xe({inputs:{x:i},backend:t,attrs:{shape:p}}),c=HS({inputs:{a:n,b:u},backend:t}),l=VS({inputs:{x:c},backend:t}),m=rm({inputs:{x:l},backend:t,attrs:{axis:a,keepDims:!1}}),f=xe({inputs:{x:m},backend:t,attrs:{shape:p}}),d=GS({inputs:{a:l,b:f},backend:t});return t.disposeData(i.dataId),t.disposeData(u.dataId),t.disposeData(c.dataId),t.disposeData(l.dataId),t.disposeData(m.dataId),t.disposeData(f.dataId),d}var rV={kernelName:Xn,backendName:"webgpu",kernelFunc:tne};var rne=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,paddings:a}=o;x.assert(n.shape.length<=4,()=>"spaceToBatchND for rank > 4 with a WebGPU backend not implemented yet");let i=s.reduce((y,b)=>y*b),p=[[0,0]];p.push(...a);for(let y=1+s.length;y<n.shape.length;++y)p.push([0,0]);let u=[],c=US({inputs:{x:n},backend:t,attrs:{paddings:p,constantValue:0}}),l=I.getReshaped(c.shape,s,i,!1),m=I.getPermuted(l.length,s.length,!1),f=I.getReshapedPermuted(c.shape,s,i,!1),d=xe({inputs:{x:c},backend:t,attrs:{shape:l}}),h=Nr({inputs:{x:d},backend:t,attrs:{perm:m}}),g=xe({inputs:{x:h},backend:t,attrs:{shape:f}});return u.push(c),u.push(d),u.push(h),u.forEach(y=>t.disposeData(y.dataId)),g},oV={kernelName:ks,backendName:"webgpu",kernelFunc:rne};var kx=class{constructor(e,t){this.variableNames=["A"],this.workGroupSize=[64,1,1],this.size=!0;let o=new Array(e.length);for(let n=0;n<o.length;n++)o[n]=e[n]*t[n];this.outputShape=o,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.rank=this.outputShape.length,this.shaderKey="tile"}getUserCode(){let e=one(this.rank,"uniforms.");return`
${ue("index")} {
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
setOutputAtIndex(index, getA(${e}));
}
}
`}};function one(r,e=""){if(r>=5)throw Error(`Tile for rank ${r} is not yet supported`);if(r===1)return`(resRC % ${e}aShape)`;let t=["resRC.x","resRC.y","resRC.z","resRC.w"],o=[];for(let n=0;n<r;n++)o.push(`(${t[n]} % ${e}aShape[${n}])`);return o.join()}function qS(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reps:s}=o;if(t.shouldExecuteOnCPU([n])||n.dtype==="string"||n.shape.length>=5){let p=t.readSync(n.dataId),u=n.dtype==="string"?p.map(m=>x.decodeString(m)):p,c=ne(n.shape,n.dtype,u),l=xL(c,s);return t.makeTensorInfo(l.shape,l.dtype,l.values)}let a=new kx(n.shape,s);return t.runWebGPUProgram(a,[n],n.dtype)}var nV={kernelName:wo,backendName:"webgpu",kernelFunc:qS};function nne(r){let{inputs:e,backend:t,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=e,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=I.calculateShapes(s,n,i),f=!1;if(s.dtype==="string"){let R=t.bufferSync(n),A=t.bufferSync(s),D=x.decodeString(t.readSync(a.dataId)[0]),O=lL(R,A,i,m,c,u,p,l,D,f);return t.makeTensorInfo(i,O.dtype,O.values)}let d=[m/c,c],h=xe({inputs:{x:n},backend:t,attrs:{shape:[u,p]}}),g=s.shape.length?xe({inputs:{x:s},backend:t,attrs:{shape:[u,c]}}):Lt({inputs:{x:s},backend:t}),y=g.dtype,b=t.makeTensorInfo([],y,x.makeZerosTypedArray(1,y)),C=xe({inputs:{x:a},backend:t,attrs:{shape:Array(d.length).fill(1)}}),w=qS({inputs:{x:C},backend:t,attrs:{reps:d}}),k=x.sizeFromShape([u,c]),_=[{type:"int32",data:[p]},{type:"int32",data:l},{type:"int32",data:[k]}];switch(u){case 0:break;case 1:{let R=new Fi([u,c],p,h.shape.length,g.shape.length,l,d,y,f);t.runWebGPUProgram(R,[g,h],y,_,w)}break;default:{let R=new Fi([u,c],p,h.shape.length,b.shape.length,l,d,y,f);t.runWebGPUProgram(R,[b,h],y,_,w)}{let R=new Fi([u,c],p,h.shape.length,g.shape.length,l,d,y);t.runWebGPUProgram(R,[g,h],y,_,w)}}let E=xe({inputs:{x:w},backend:t,attrs:{shape:i}});return t.disposeData(h.dataId),t.disposeData(g.dataId),t.disposeData(C.dataId),t.disposeData(b.dataId),t.disposeData(w.dataId),E}var sV={kernelName:ei,backendName:"webgpu",kernelFunc:nne};function sne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=o,i=x.parseAxisParam(a,n.shape)[0],p=I.prepareSplitSize(n,s,i),u=n.shape.length,c=new Array(u).fill(0),l=n.shape.slice();return p.map(m=>{let f=[...l];f[i]=m;let d=ms({inputs:{x:n},backend:t,attrs:{begin:c,size:f}});return c[i]+=m,d})}var aV={kernelName:Ts,backendName:"webgpu",kernelFunc:sne};var ane=Ge({opType:pe.SQRT}),iV={kernelName:bo,backendName:"webgpu",kernelFunc:ane};var uV={kernelName:ti,backendName:"webgpu",kernelFunc:({inputs:r,backend:e})=>{let{x:t}=r,o=e,n=new Zo(t.shape,pe.SQUARE);return o.runWebGPUProgram(n,[t],t.dtype)}};var ine=it({opType:ye.SQUARED_DIFFERENCE}),pV={kernelName:Co,backendName:"webgpu",kernelFunc:ine};var Tx=class{constructor(e){this.variableNames=["x"],this.workPerThread=1,this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize,[this.workPerThread,1,1]);let t=At(this.outputShape.length);this.uniforms=`begin : ${t}, strides : ${t}, `,this.shaderKey="stridedSlice"}getUserCode(){let e=this.outputShape.length,t="";if(e===1)t="coords * uniforms.strides + uniforms.begin";else{let n=0;t=this.outputShape.map((s,a)=>(n++,this.outputShape.length===1?`coords * uniforms.strides[${a}] + uniforms.begin[${a}]`:`coords[${n-1}] * uniforms.strides[${a}] + uniforms.begin[${a}]`)).join(",")}return`
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
setOutputAtIndex(index, getX(${t}));
}
}
`}};function une(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:f,finalShape:d,isIdentity:h,sliceDim0:g,isSimpleSlice:y,begin:b,end:C,strides:w}=et.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=xe({inputs:{x:n},backend:t,attrs:{shape:d}});else if(g||y){x.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=et.computeOutShape(b,C,w),E=ms({inputs:{x:n},backend:t,attrs:{begin:b,size:_}});k=xe({inputs:{x:E},backend:t,attrs:{shape:d}}),t.disposeData(E.dataId)}else if(t.shouldExecuteOnCPU([n])){let E=t.readSync(n.dataId),R=ne(n.shape,n.dtype,E),A=dL(f,R,w,b);k=t.makeTensorInfo(d,n.dtype,A.values)}else{let E=new Tx(f),R=[{type:"int32",data:b},{type:"int32",data:w}],A=t.runWebGPUProgram(E,[n],n.dtype,R);k=xe({inputs:{x:A},backend:t,attrs:{shape:d}}),t.disposeData(A.dataId)}return k}var cV={kernelName:Yn,backendName:"webgpu",kernelFunc:une};function pne(r){let{inputs:e,backend:t,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=e,m=t.readSync(c.dataId),f=t.readSync(l.dataId),[d,h]=hL(m,f,n,s,a,i,p,u);return[t.makeTensorInfo([d.length],"string",d),t.makeTensorInfo(l.shape,"int32",h)]}var lV={kernelName:Ns,backendName:"webgpu",kernelFunc:pne};var cne=Ge({opType:pe.TANH}),mV={kernelName:Qn,backendName:"webgpu",kernelFunc:cne};var Nx=class{constructor(e){this.variableNames=["x","indices"],this.workGroupSize=[256,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.uniforms=`inputSize : i32, firstPass : i32, negativeInf : f32,
dir : i32, inc : i32,`,this.shaderKey="swap"}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
let outC = getCoordsFromIndex(index);
let batch = outC[0];
let elemIdx = outC[1];
// We compare elements pair-wise within a group of size 2 * inc.
// The comparing rule for each group alternates between ascending
// and descending. Within each group, we compare each pair at
// positions i and i+inc. To decide whether an element at position i
// is x0 or x1, we mod it by 2 * inc, if the result is smaller than
// inc, it is in the first half of the group, we denote it as x0,
// otherwise we denote it as x1.
// For example, as shown in the Bitonic top K paper referenced
// above, Figure5(a) shows that element[1] is in the second half of
// the group when group size is 2, but it is in the first half of
// the group when group size is 4.
let isFirstInPair = elemIdx % (2 * uniforms.inc) < uniforms.inc;
var i = 0;
if (isFirstInPair) {
i = elemIdx;
} else {
i = elemIdx - uniforms.inc;
}
var i0 = 0;
if (uniforms.firstPass == 1) {
i0 = i;
} else {
i0 = i32(getIndices(batch, i));
}
var i1 = 0;
if (uniforms.firstPass == 1) {
i1 = i + uniforms.inc;
} else {
i1 = i32(getIndices(batch, i + uniforms.inc));
}
var x0 = f32(0.0);
var x1 = f32(0.0);
if (i0 < uniforms.inputSize) {
x0 = getX(batch, i0);
} else {
x0 = uniforms.negativeInf;
}
if (i1 < uniforms.inputSize) {
x1 = getX(batch, i1);
} else {
x1 = uniforms.negativeInf;
}
let reverse = elemIdx % (2 * uniforms.dir) >= uniforms.dir;
let isGreater = x0 > x1 || (x0 == x1 && i1 > i0);
if (reverse == isGreater) {
// Elements in opposite order of direction
let iTemp = i0;
i0 = i1;
i1 = iTemp;
}
if (isFirstInPair) {
setOutputAtIndex(index, f32(i0));
} else {
setOutputAtIndex(index, f32(i1));
}
}
}
`}},_x=class{constructor(e){this.variableNames=["x","indices"],this.workGroupSize=[256,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.uniforms="inputSize : i32, firstPass : i32, k : i32,",this.shaderKey="merge"}getUserCode(){return`
${ue("index")} {
if (index < uniforms.size) {
let outC = getCoordsFromIndex(index);
let batch = outC[0];
let elemIdx = outC[1];
// The output size is half of the previous size.
// If the previous sequence is | | | | _ _ _ _ | | | | _ _ _ _
// (k=4), we only need to output the indices at positions |, the
// indices at positions _ can be thrown away, see Figure5(b) After
// Phase 2 (Merge phase) in the Bitonic Top K paper referenced
// above.
// For example, the paper shows we only need to output the orange
// bars. The output sequence should look like this | | | | | | | |.
// Because the sequence is halved, to map the output index back to
// the previous sequence to find the corresponding value, we need
// to double the index. When we double the index, we basically
// interpolate a position, so 2i looks like
// | _ | _ | _ | _ | _ | _ | _. We move the | to the first k
// position of each 2k positions by - elemIdx % k. E.g. for output
// at index 4,5,6,7, we want to get the corresponding element at
// original index 8,9,10,11, for output at index 8,9,10,11,
// we want to get the corresponding element at original index
// 16,17,18,19, so on and so forth.
var i = 0;
if (elemIdx < uniforms.k) {
i = elemIdx;
} else {
i = elemIdx * 2 - elemIdx % uniforms.k;
}
var i0 = 0;
if (uniforms.firstPass == 1) {
i0 = i;
} else {
i0 = i32(getIndices(batch, i));
}
var i1 = 0;
if (uniforms.firstPass == 1) {
i1 = i + uniforms.k;
} else {
i1 = i32(getIndices(batch, i + uniforms.k));
}
let x0 = getX(batch, i0);
var x1 = f32(0.0);
if (i1 < uniforms.inputSize) {
x1 = getX(batch, i1);
} else {
x1 = x0;
}
if (x0 >= x1) {
setOutputAtIndex(index, f32(i0));
} else {
setOutputAtIndex(index, f32(i1));
}
}
}
`}};function Mc(r,e){e!==null&&r.disposeData(e.dataId)}function fV(r){let e=1;for(;e<r;)e*=2;return e}function lne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{k:s,sorted:a}=o,i=n.shape,p=i[i.length-1];if(t.shouldExecuteOnCPU([n])){let k=t.readSync(n.dataId),[_,E]=yL(k,i,n.dtype,s,a);return[t.makeTensorInfo(_.shape,_.dtype,_.values),t.makeTensorInfo(E.shape,E.dtype,E.values)]}if(s===0)return i[i.length-1]=0,[t.makeTensorInfo(i,n.dtype,[]),t.makeTensorInfo(i,"int32",[])];if(p===1)return[n,$o({attrs:{shape:i,dtype:"int32",value:0},backend:t})];let c=x.sizeFromShape(i)/p,l=xe({inputs:{x:n},attrs:{shape:[c,p]},backend:t}),m=fV(s),f=fV(p),d=null,h=()=>d===null?[l,l]:[l,d],g=(k,_,E)=>{let R=h(),A=new Nx(E),O=[{type:"int32",data:[p]},{type:"int32",data:[d===null?1:0]},{type:"float32",data:[Number.NEGATIVE_INFINITY]},{type:"int32",data:[k]},{type:"int32",data:[_]}],M=d;d=t.runWebGPUProgram(A,R,"int32",O),Mc(t,M)};for(let k=1;k<m;k*=2){let _=k*2;for(let E=k;E>=1;E/=2)g(_,E,[c,f])}for(let k=f;k>m;k/=2){let _=h(),E=new _x([c,k/2]),A=[{type:"int32",data:[p]},{type:"int32",data:[d===null?1:0]},{type:"int32",data:[m]}],D=d;d=t.runWebGPUProgram(E,_,"int32",A),Mc(t,D);let O=m/2,M=O*2;for(let L=O;L>=1;L/=2)g(M,L,d.shape)}let y=d;d=ms({inputs:{x:d},backend:t,attrs:{begin:0,size:[c,s]}}),Mc(t,y);let b=WS({inputs:{x:l,indices:d},backend:t,attrs:{axis:1,batchDims:1}});Mc(t,l);let C=i.slice(0,-1);C.push(s),y=d,d=xe({inputs:{x:d},attrs:{shape:C},backend:t}),Mc(t,y);let w=b;return b=xe({inputs:{x:b},attrs:{shape:C},backend:t}),Mc(t,w),[b,d]}var dV={kernelName:Zn,backendName:"webgpu",kernelFunc:lne};var Ex=class{constructor(e){this.variableNames=["Image","Transforms"],this.uniforms="interpolationModeId : i32, fillModeId : i32, fillValue : f32,",this.workGroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=fe(this.outputShape),this.dispatch=ae(this.dispatchLayout,this.outputShape,this.workGroupSize),this.shaderKey="transform"}getUserCode(){return`
fn mapCoord(outCoord : f32, len : f32) -> f32{
var inCoord = outCoord;
if(uniforms.fillModeId == 2) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz2 = 2.0 * len;
if (inCoord < sz2) {
inCoord = sz2 * f32(i32(f32(-inCoord / sz2))) +
inCoord;
}
if (inCoord < -len) {
inCoord = inCoord + sz2;
} else {
inCoord = -inCoord - 1.0;
}
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz2 = 2.0 * len;
inCoord = inCoord - sz2 * f32(i32(f32(inCoord / sz2)));
if (inCoord >= len) {
inCoord = sz2 - inCoord - 1.0;
}
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (uniforms.fillModeId == 3) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz = len - 1.0;
inCoord = inCoord + len * (f32(i32(f32(-inCoord / sz))) + 1.0);
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz = len - 1.0;
inCoord = inCoord - len * f32(i32(f32(inCoord / sz)));
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (uniforms.fillModeId == 4) {
return clamp(outCoord, 0.0, len - 1.0);
}
return outCoord;
}
fn readWithFillValue(batch : i32, coordY : i32, coordX : i32,
channel : i32) -> f32 {
var outputValue : f32;
if (0 <= coordY && coordY < uniforms.imageShape[1] && 0 <= coordX && coordX < uniforms.imageShape[2]) {
outputValue = getImage(batch, coordY, coordX, channel);
} else {
outputValue = uniforms.fillValue;
}
return outputValue;
}
${ue("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var outputValue : f32;
let batch = coords[0];
let x = coords[2];
let y = coords[1];
let channel = coords[3];
let xf = f32(x);
let yf = f32(y);
let a1 = getTransforms(batch, 0);
let a2 = getTransforms(batch, 1);
let a3 = getTransforms(batch, 2);
let b1 = getTransforms(batch, 3);
let b2 = getTransforms(batch, 4);
let b3 = getTransforms(batch, 5);
let c1 = getTransforms(batch, 6);
let c2 = getTransforms(batch, 7);
let projection = c1 * xf + c2 * yf + 1.0;
if (projection == 0.0) {
outputValue = uniforms.fillValue;
} else {
let inX = (a1 * xf + a2 * yf + a3) / projection;
let inY = (b1 * xf + b2 * yf + b3) / projection;
let mapX = mapCoord(inX, f32(uniforms.imageShape[2]));
let mapY = mapCoord(inY, f32(uniforms.imageShape[1]));
if (uniforms.interpolationModeId == 1) {
let coordY = i32(round(mapY));
let coordX = i32(round(mapX));
outputValue = readWithFillValue(batch, coordY, coordX,
channel);
} else {
let yFloor = floor(mapY);
let xFloor = floor(mapX);
let yCeil = yFloor + 1.0;
let xCeil = xFloor + 1.0;
let valueYFloor = (xCeil - mapX) *
readWithFillValue(batch, i32(yFloor), i32(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, i32(yFloor), i32(xCeil), channel);
let valueYCeil = (xCeil - mapX) *
readWithFillValue(batch, i32(yCeil), i32(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, i32(yCeil), i32(xCeil), channel);
outputValue = (yCeil - mapY) * valueYFloor +
(mapY - yFloor) * valueYCeil;
}
}
setOutputAtIndex(index, outputValue);
}
}
`}};function mne(r){let{inputs:e,backend:t,attrs:o}=r,{image:n,transforms:s}=e,{interpolation:a,fillMode:i,fillValue:p,outputShape:u}=o,[c,l,m,f]=n.shape,[d,h]=u!=null?u:[l,m],g=[c,d,h,f],y=new Ex(g),b=a==="nearest"?1:2,C;switch(i){case"constant":C=1;break;case"reflect":C=2;break;case"wrap":C=3;break;case"nearest":C=4;break;default:C=1;break}let w=[{type:"int32",data:[b]},{type:"int32",data:[C]},{type:"float32",data:[p]}];return t.runWebGPUProgram(y,[n,s],"float32",w)}var hV={kernelName:Jn,backendName:"webgpu",kernelFunc:mne};function fne(r){let{inputs:e,backend:t,attrs:o}=r,{value:n}=e,{axis:s}=o;s<0&&(s+=n.shape.length);let a=n,i=a.shape.length,p=n.shape[s],u=new Array(i-1),c=0;for(let h=0;h<i;h++)h!==s&&(u[c++]=a.shape[h]);let l=[],m=new Array(i).fill(0),f=a.shape.slice();f[s]=1;let d=new Array(p);for(let h=0;h<d.length;h++){m[s]=h;let g=ms({inputs:{x:a},backend:t,attrs:{begin:m,size:f}}),y=xe({inputs:{x:g},backend:t,attrs:{shape:u}});d[h]=y,l.push(g)}return l.forEach(h=>t.disposeData(h.dataId)),d}var gV={kernelName:_s,backendName:"webgpu",kernelFunc:fne};var dne=[aM,CL,IL,wL,vL,kL,TL,EL,$L,AL,OL,ML,LL,uM,VL,WL,UL,GL,HL,qL,XL,YL,QL,ZL,tB,rB,oB,nB,sB,aB,nM,iB,cB,uB,pB,lB,mB,fB,dB,hB,gB,xB,iM,BL,yB,bB,CB,IB,wB,SB,vB,NL,kB,TB,_L,NB,_B,EB,JL,$B,RB,AB,FL,PB,OB,MB,LB,BB,VB,zB,DL,WB,UB,GB,HB,sM,qB,KB,jB,XB,YB,QB,ZB,JB,eV,RL,cV,lV,rV,oV,sV,aV,iV,uV,pV,tV,eB,mV,nV,dV,hV,SL,gV,FB];for(let r of dne)ya(r);var xV="4.0.0",hne="4.0.0",gne="4.0.0",xne="4.0.0",yne="4.0.0",bne="0.0.1-alpha.14",Cne={tfjs:xV,"tfjs-core":xV,"tfjs-converter":hne,"tfjs-backend-cpu":gne,"tfjs-backend-webgl":xne,"tfjs-backend-wasm":yne,"tfjs-backend-webgpu":bne};export{sn as Abs,Li as Acos,Bi as Acosh,xi as AdadeltaOptimizer,yi as AdagradOptimizer,bi as AdamOptimizer,Ci as AdamaxOptimizer,_r as Add,an as AddN,oa as All,na as Any,un as ArgMax,ja as ArgMin,Vi as Asin,zi as Asinh,Wi as Atan,sa as Atan2,Ui as Atanh,pn as AvgPool,ip as AvgPool3D,Fm as AvgPool3DGrad,Am as AvgPoolGrad,Gl as BackendWasm,cn as BatchMatMul,hs as BatchToSpaceND,up as Bincount,pp as BroadcastArgs,Tne as BroadcastTo,to as Cast,ro as Ceil,Ro as ClipByValue,aa as Complex,cp as ComplexAbs,gs as Concat,ln as Conv2D,lp as Conv2DBackpropFilter,mn as Conv2DBackpropInput,mp as Conv3D,Dm as Conv3DBackpropFilterV2,fp as Conv3DBackpropInputV2,fn as Cos,dn as Cosh,xn as CropAndResize,hn as Cumprod,gn as Cumsum,rn as DataStorage,dp as DenseBincount,yn as DepthToSpace,bn as DepthwiseConv2dNative,hp as DepthwiseConv2dNativeBackpropFilter,gp as DepthwiseConv2dNativeBackpropInput,xp as Diag,yp as Dilation2D,vb as Dilation2DBackpropFilter,Sb as Dilation2DBackpropInput,Cb as ENV,Xa as Einsum,In as Elu,Pm as EluGrad,Qc as Environment,oo as Equal,Gi as Erf,no as Exp,xs as ExpandDims,wn as Expm1,bp as FFT,ys as Fill,Sn as FlipLeftRight,so as Floor,vn as FloorDiv,Zi as FromPixels,kn as FusedBatchNorm,Do as FusedConv2D,Po as FusedDepthwiseConv2D,Fu as GPGPUContext,Tn as GatherNd,bs as GatherV2,bl as GraphModel,ao as Greater,io as GreaterEqual,Cp as IFFT,uo as Identity,Ya as Imag,Hi as IsFinite,qi as IsInf,ia as IsNan,Jr as KernelBackend,wp as LRN,Om as LRNGrad,Nn as LeakyRelu,po as Less,co as LessEqual,Ip as LinSpace,lo as Log,Ki as Log1p,Nne as LogSoftmax,_n as LogicalAnd,En as LogicalNot,ua as LogicalOr,g0 as LogicalXor,_ne as LowerBound,Si as MathBackendCPU,Ni as MathBackendWebGL,$n as Max,Rn as MaxPool,Sp as MaxPool3D,Lm as MaxPool3DGrad,Mm as MaxPoolGrad,vp as MaxPoolWithArgmax,mo as Maximum,An as Mean,Fn as Min,fo as Minimum,Dn as MirrorPad,ji as Mod,Ii as MomentumOptimizer,kp as Multinomial,ho as Multiply,Pn as Neg,On as NonMaxSuppressionV3,pa as NonMaxSuppressionV4,Mn as NonMaxSuppressionV5,go as NotEqual,Ub as OP_SCOPE_SUFFIX,ca as OneHot,Cs as OnesLike,wr as Optimizer,ns as OptimizerConstructors,Is as Pack,Ln as PadV2,Ene as Pool,Bn as Pow,Vn as Prelu,Ao as Prod,wi as RMSPropOptimizer,Tp as RaggedGather,Np as RaggedRange,_p as RaggedTensorToTensor,ws as Range,Fb as Rank,la as Real,Cn as RealDiv,ma as Reciprocal,Et as Reduction,zn as Relu,Gn as Relu6,Ss as Reshape,Un as ResizeBilinear,Vm as Res