human/dist/tfjs.esm.js

7457 lines
1.1 MiB
JavaScript
Raw Normal View History

2022-11-18 17:13:29 +01:00
/*
Human
homepage: <https://github.com/vladmandic/human>
author: <https://github.com/vladmandic>'
*/
2023-01-07 21:50:37 +01:00
var PV=Object.create;var yb=Object.defineProperty;var MV=Object.getOwnPropertyDescriptor;var LV=Object.getOwnPropertyNames;var BV=Object.getPrototypeOf,VV=Object.prototype.hasOwnProperty;var Kt=(r,e)=>()=>(e||r((e={exports:{}}).exports,e),e.exports),Ge=(r,e)=>{for(var t in e)yb(r,t,{get:e[t],enumerable:!0})},zV=(r,e,t,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let n of LV(e))!VV.call(r,n)&&n!==t&&yb(r,n,{get:()=>e[n],enumerable:!(o=MV(e,n))||o.enumerable});return r};var ap=(r,e,t)=>(t=r!=null?PV(BV(r)):{},zV(e||!r||!r.__esModule?yb(t,"default",{value:r,enumerable:!0}):t,r));var i0=Kt((Sse,a0)=>{a0.exports=It;var yo=null;try{yo=new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([0,97,115,109,1,0,0,0,1,13,2,96,0,1,127,96,4,127,127,127,127,1,127,3,7,6,0,1,1,1,1,1,6,6,1,127,1,65,0,11,7,50,6,3,109,117,108,0,1,5,100,105,118,95,115,0,2,5,100,105,118,95,117,0,3,5,114,101,109,95,115,0,4,5,114,101,109,95,117,0,5,8,103,101,116,95,104,105,103,104,0,0,10,191,1,6,4,0,35,0,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,126,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,127,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,128,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,129,34,4,66,32,135,167,36,0,32,4,167,11,36,1,1,126,32,0,173,32,1,173,66,32,134,132,32,2,173,32,3,173,66,32,134,132,130,34,4,66,32,135,167,36,0,32,4,167,11])),{}).exports}catch(r){}function It(r,e,t){this.low=r|0,this.high=e|0,this.unsigned=!!t}It.prototype.__isLong__;Object.defineProperty(It.prototype,"__isLong__",{value:!0});function Mr(r){return(r&&r.__isLong__)===!0}It.isLong=Mr;var QI={},ZI={};function eu(r,e){var t,o,n;return e?(r>>>=0,(n=0<=r&&r<256)&&(o=ZI[r],o)?o:(t=vt(r,(r|0)<0?-1:0,!0),n&&(ZI[r]=t),t)):(r|=0,(n=-128<=r&&r<128)&&(o=QI[r],o)?o:(t=vt(r,r<0?-1:0,!1),n&&(QI[r]=t),t))}It.fromInt=eu;function bo(r,e){if(isNaN(r))return e?Ji:Co;if(e){if(r<0)return Ji;if(r>=r0)return s0}else{if(r<=-e0)return Pr;if(r+1>=e0)return n0}return r<0?bo(-r,e).neg():vt(r%Ep|0,r/Ep|0,e)}It.fromNumber=bo;function vt(r,e,t){return new It(r,e,t)}It.fromBits=vt;var Dm=Math.pow;function Db(r,e,t){if(r.length===0)throw Error("empty string");if(r==="NaN"||r==="Infinity"||r==="+Infinity"||r==="-Infinity")return Co;if(typeof e=="number"?(t=e,e=!1):e=!!e,t=t||10,t<2||36<t)throw RangeError("radix");var o;if((o=r.indexOf("-"))>0)throw Error("interior hyphen");if(o===0)return Db(r.substring(1),e,t).neg();for(var n=bo(Dm(t,8)),s=Co,a=0;a<r.length;a+=8){var i=Math.min(8,r.length-a),p=parseInt(r.substring(a,a+i),t);if(i<8){var u=bo(Dm(t,i));s=s.mul(u).add(bo(p))}else s=s.mul(n),s=s.add(bo(p))}return s.unsigned=e,s}It.fromString=Db;function rs(r,e){return typeof r=="number"?bo(r,e):typeof r=="string"?Db(r,e):vt(r.low,r.high,typeof e=="boolean"?e:r.unsigned)}It.fromValue=rs;var JI=1<<16,cz=1<<24,Ep=JI*JI,r0=Ep*Ep,e0=r0/2,t0=eu(cz),Co=eu(0);It.ZERO=Co;var Ji=eu(0,!0);It.UZERO=Ji;var $p=eu(1);It.ONE=$p;var o0=eu(1,!0);It.UONE=o0;var Rb=eu(-1);It.NEG_ONE=Rb;var n0=vt(-1,2147483647,!1);It.MAX_VALUE=n0;var s0=vt(-1,-1,!0);It.MAX_UNSIGNED_VALUE=s0;var Pr=vt(0,-2147483648,!1);It.MIN_VALUE=Pr;var de=It.prototype;de.toInt=function(){return this.unsigned?this.low>>>0:this.low};de.toNumber=function(){return this.unsigned?(this.high>>>0)*Ep+(this.low>>>0):this.high*Ep+(this.low>>>0)};de.toString=function(e){if(e=e||10,e<2||36<e)throw RangeError("radix");if(this.isZero())return"0";if(this.isNegative())if(this.eq(Pr)){var t=bo(e),o=this.div(t),n=o.mul(t).sub(this);return o.toString(e)+n.toInt().toString(e)}else return"-"+this.neg().toString(e);for(var s=bo(Dm(e,6),this.unsigned),a=this,i="";;){var p=a.div(s),u=a.sub(p.mul(s)).toInt()>>>0,c=u.toString(e);if(a=p,a.isZero())return c+i;for(;c.length<6;)c="0"+c;i=""+c+i}};de.getHighBits=function(){return this.high};de.getHighBitsUnsigned=function(){return this.high>>>0};de.getLowBits=function(){return this.low};de.getLowBi
2023-01-06 19:23:06 +01:00
`),z=F=>M.writeSync(2,F+`
2023-03-07 00:15:42 +01:00
`));var U=u.print||V,K=u.printErr||z;Object.assign(u,d),d=null,u.arguments&&(f=u.arguments),u.thisProgram&&(h=u.thisProgram),u.quit&&(g=u.quit);var H=4,q=Atomics.load,Z=Atomics.store,ee=Atomics.compareExchange,oe;u.wasmBinary&&(oe=u.wasmBinary);var J=u.noExitRuntime||!0;typeof WebAssembly!="object"&&Ki("no native wasm support detected");var te,ie,ce=!1,ye;function Ne(F,B){F||Ki(B)}var Ie=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):void 0;function Ae(F,B,ue){for(var _e=B+ue,Me=B;F[Me]&&!(Me>=_e);)++Me;if(Me-B>16&&F.buffer&&Ie)return Ie.decode(F.buffer instanceof SharedArrayBuffer?F.slice(B,Me):F.subarray(B,Me));for(var Pe="";B<Me;){var fe=F[B++];if(!(fe&128)){Pe+=String.fromCharCode(fe);continue}var ve=F[B++]&63;if((fe&224)==192){Pe+=String.fromCharCode((fe&31)<<6|ve);continue}var Ft=F[B++]&63;if((fe&240)==224?fe=(fe&15)<<12|ve<<6|Ft:fe=(fe&7)<<18|ve<<12|Ft<<6|F[B++]&63,fe<65536)Pe+=String.fromCharCode(fe);else{var Qr=fe-65536;Pe+=String.fromCharCode(55296|Qr>>10,56320|Qr&1023)}}return Pe}function Re(F,B){return F?Ae(o(),F,B):""}function ot(F,B,ue,_e){if(!(_e>0))return 0;for(var Me=ue,Pe=ue+_e-1,fe=0;fe<F.length;++fe){var ve=F.charCodeAt(fe);if(ve>=55296&&ve<=57343){var Ft=F.charCodeAt(++fe);ve=65536+((ve&1023)<<10)|Ft&1023}if(ve<=127){if(ue>=Pe)break;B[ue++]=ve}else if(ve<=2047){if(ue+1>=Pe)break;B[ue++]=192|ve>>6,B[ue++]=128|ve&63}else if(ve<=65535){if(ue+2>=Pe)break;B[ue++]=224|ve>>12,B[ue++]=128|ve>>6&63,B[ue++]=128|ve&63}else{if(ue+3>=Pe)break;B[ue++]=240|ve>>18,B[ue++]=128|ve>>12&63,B[ue++]=128|ve>>6&63,B[ue++]=128|ve&63}}return B[ue]=0,ue-Me}function pt(F,B,ue){return ot(F,o(),B,ue)}var ze,ct,at,ft,ht,Rr,Pt,qr,er;w&&(ze=u.buffer);function Nt(F){ze=F,u.HEAP8=ct=new Int8Array(F),u.HEAP16=ft=new Int16Array(F),u.HEAP32=Rr=new Int32Array(F),u.HEAPU8=at=new Uint8Array(F),u.HEAPU16=ht=new Uint16Array(F),u.HEAPU32=Pt=new Uint32Array(F),u.HEAPF32=qr=new Float32Array(F),u.HEAPF64=er=new Float64Array(F)}var tr=u.INITIAL_MEMORY||16777216;if(w)te=u.wasmMemory,ze=u.buffer;else if(u.wasmMemory)te=u.wasmMemory;else if(te=new WebAssembly.Memory({initial:tr/65536,maximum:32768,shared:!0}),!(te.buffer instanceof SharedArrayBuffer))throw K("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),C&&K("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)"),Error("bad memory");te&&(ze=te.buffer),tr=ze.byteLength,Nt(ze);var rr,jr=[],Xr=[],cr=[],ea=!1;function Do(){return J}function hs(){if(u.preRun)for(typeof u.preRun=="function"&&(u.preRun=[u.preRun]);u.preRun.length;)Dc(u.preRun.shift());Pc(jr)}function qt(){ea=!0,!w&&Pc(Xr)}function ta(){if(!w){if(u.postRun)for(typeof u.postRun=="function"&&(u.postRun=[u.postRun]);u.postRun.length;)AI(u.postRun.shift());Pc(cr)}}function Dc(F){jr.unshift(F)}function Fc(F){Xr.unshift(F)}function AI(F){cr.unshift(F)}var Ya=0,Ju=null,ra=null;function Rx(F){Ya++,u.monitorRunDependencies&&u.monitorRunDependencies(Ya)}function Yl(F){if(Ya--,u.monitorRunDependencies&&u.monitorRunDependencies(Ya),Ya==0&&(Ju!==null&&(clearInterval(Ju),Ju=null),ra)){var B=ra;ra=null,B()}}function Ki(F){u.onAbort&&u.onAbort(F),F="Aborted("+F+")",K(F),ce=!0,ye=1,F+=". Build with -sASSERTIONS for more info.";var B=new WebAssembly.RuntimeError(F);throw l(B),B}var Dx="data:application/octet-stream;base64,";function Ql(F){return F.startsWith(Dx)}function ep(F){return F.startsWith("file://")}var lr;lr="tfjs-backend-wasm-threaded-simd.wasm",Ql(lr)||(lr=_(lr));function Zl(F){try{if(F==lr&&oe)return new Uint8Array(oe);if(R)return R(F);throw"both async and sync fetching of the wasm failed"}catch(B){Ki(B)}}function Fx(){if(!oe&&(x||b)){if(typeof fetch=="function"&&!ep(lr))return fetch(lr,{credentials:"same-origin"}).then(function(F){if(!F.ok)throw"failed to load wasm binary file at '"+lr+"'";return F.arrayBuffer()}).catch(function(){return Zl(lr)});if(A)return new Promise(function(F,B){A(lr,function(ue){F(new Uin
");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"})};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}WasmBackendModuleThreadedSimd(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};`});var I3=Kt((Ig,uI)=>{var iI=(()=>{var r=typeof document!="undefined"&&document.currentScript?document.currentScript.src:void 0;return typeof __filename!="undefined"&&(r=r||__filename),function(e){e=e||{};var t=typeof e!="undefined"?e:{},o,n;t.ready=new Promise(function(G,se){o=G,n=se});var s;typeof process!="undefined"&&process.listeners&&(s={uncaughtException:process.listeners("uncaughtException"),unhandledRejection:process.listeners("unhandledRejection")});var a=Object.assign({},t),i=[],p="./this.program",u=(G,se)=>{throw se},c=typeof window=="object",l=typeof importScripts=="function",m=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string",d="";function f(G){return t.locateFile?t.locateFile(G,d):d+G}var h,g,x,b;function C(G){if(G instanceof Ju)return;E("exiting due to exception: "+G)}if(m){var w=oI(),k=nI();l?d=k.dirname(d)+"/":d=__dirname+"/",h=(G,se)=>(G=hs(G)?new URL(G):k.normalize(G),w.readFileSync(G,se?void 0:"utf8")),x=G=>{var se=h(G,!0);return se.buffer||(se=new Uint8Array(se)),se},g=(G,se,Te)=>{G=hs(G)?new URL(G):k.normalize(G),w.readFile(G,function(nt,Tt){nt?Te(nt):se(Tt.buffer)})},process.argv.length>1&&(p=process.argv[1].replace(/\\/g,"/")),i=process.argv.slice(2),process.on("uncaughtException",function(G){if(!(G instanceof Ju))throw G}),process.on("unhandledRejection",function(G){throw G}),u=(G,se)=>{if(at())throw process.exitCode=G,se;C(se),process.exit(G)},t.inspect=function(){return"[Emscripten Module object]"}}else(c||l)&&(l?d=self.location.href:typeof document!="undefined"&&document.currentScript&&(d=document.currentScript.src),r&&(d=r),d.indexOf("blob:")!==0?d=d.substr(0,d.replace(/[?#].*/,"").lastIndexOf("/")+1):d="",h=G=>{var se=new XMLHttpRequest;return se.open("GET",G,!1),se.send(null),se.responseText},l&&(x=G=>{var se=new XMLHttpRequest;return se.open("GET",G,!1),se.responseTy
2022-11-18 17:13:29 +01:00
`)),p.join(`
2023-01-06 19:23:06 +01:00
`)}function Cz(r,e,t,o){let n=We(e),s=o[o.length-1],a=new Array(s).fill(0),i=e.length,p=t==="complex64"?Yc(r):r;if(i>1)for(let u=0;u<n/s;u++){let c=u*s;for(let l=0;l<s;l++)a[l]=Math.max(a[l],Xc(p[c+l],0,t).length)}return a}function Xc(r,e,t){let o;return Array.isArray(r)?o=`${parseFloat(r[0].toFixed(Mb))} + ${parseFloat(r[1].toFixed(Mb))}j`:Oo(r)?o=`'${r}'`:t==="bool"?o=h0(r):o=parseFloat(r.toFixed(Mb)).toString(),Yi(o,e)}function h0(r){return r===0?"false":"true"}function Pm(r,e,t,o,n,s=!0){let a=t==="complex64"?2:1,i=e[0],p=e.length;if(p===0){if(t==="complex64"){let h=Yc(r);return[Xc(h[0],0,t)]}return t==="bool"?[h0(r[0])]:[r[0].toString()]}if(p===1){if(i>d0){let g=jc*a,x=Array.from(r.slice(0,g)),b=Array.from(r.slice((i-jc)*a,i*a));return t==="complex64"&&(x=Yc(x),b=Yc(b)),["["+x.map((C,w)=>Xc(C,n[w],t)).join(", ")+", ..., "+b.map((C,w)=>Xc(C,n[i-jc+w],t)).join(", ")+"]"]}return["["+(t==="complex64"?Yc(r):Array.from(r)).map((g,x)=>Xc(g,n[x],t)).join(", ")+"]"]}let u=e.slice(1),c=o.slice(1),l=o[0]*a,m=[];if(i>d0){for(let h=0;h<jc;h++){let g=h*l,x=g+l;m.push(...Pm(r.slice(g,x),u,t,c,n,!1))}m.push("...");for(let h=i-jc;h<i;h++){let g=h*l,x=g+l;m.push(...Pm(r.slice(g,x),u,t,c,n,h===i-1))}}else for(let h=0;h<i;h++){let g=h*l,x=g+l;m.push(...Pm(r.slice(g,x),u,t,c,n,h===i-1))}let d=p===2?",":"";m[0]="["+(i>0?m[0]+d:"");for(let h=1;h<m.length-1;h++)m[h]=" "+m[h]+d;let f=`,
2022-11-20 22:20:02 +01:00
`;for(let h=2;h<p;h++)f+=`
2023-01-07 21:50:37 +01:00
`;return m[m.length-1]=" "+m[m.length-1]+"]"+(s?"":f),m}function Yc(r){let e=[];for(let t=0;t<r.length;t+=2)e.push([r[t],r[t+1]]);return e}var st=class{constructor(e,t,o){if(this.dtype=t,this.shape=e.slice(),this.size=We(e),o!=null){let n=o.length;$(n===this.size,()=>`Length of values '${n}' does not match the size inferred by the shape '${this.size}'.`)}if(t==="complex64")throw new Error("complex64 dtype TensorBuffers are not supported. Please create a TensorBuffer for the real and imaginary parts separately and call tf.complex(real, imag).");this.values=o||Sb(t,this.size),this.strides=xs(e)}set(e,...t){t.length===0&&(t=[0]),$(t.length===this.rank,()=>`The number of provided coordinates (${t.length}) must match the rank (${this.rank})`);let o=this.locToIndex(t);this.values[o]=e}get(...e){e.length===0&&(e=[0]);let t=0;for(let n of e){if(n<0||n>=this.shape[t]){let s=`Requested out of range element at ${e}. Buffer shape=${this.shape}`;throw new Error(s)}t++}let o=e[e.length-1];for(let n=0;n<e.length-1;++n)o+=this.strides[n]*e[n];return this.values[o]}locToIndex(e){if(this.rank===0)return 0;if(this.rank===1)return e[0];let t=e[e.length-1];for(let o=0;o<e.length-1;++o)t+=this.strides[o]*e[o];return t}indexToLoc(e){if(this.rank===0)return[];if(this.rank===1)return[e];let t=new Array(this.shape.length);for(let o=0;o<t.length-1;++o)t[o]=Math.floor(e/this.strides[o]),e-=t[o]*this.strides[o];return t[t.length-1]=e,t}get rank(){return this.shape.length}toTensor(){return ns().makeTensor(this.values,this.shape,this.dtype)}},ns=null,Dp=null,Sz=null;function g0(r){ns=r}function x0(r){Dp=r}function y0(r){Sz=r}var it=class{constructor(e,t,o,n){this.kept=!1,this.isDisposedInternal=!1,this.shape=e.slice(),this.dtype=t||"float32",this.size=We(e),this.strides=xs(e),this.dataId=o,this.id=n,this.rankType=this.rank<5?this.rank.toString():"higher"}get rank(){return this.shape.length}async buffer(){let e=await this.data();return Dp.buffer(this.shape,this.dtype,e)}bufferSync(){return Dp.buffer(this.shape,this.dtype,this.dataSync())}async array(){let e=await this.data();return Xi(this.shape,e,this.dtype==="complex64")}arraySync(){return Xi(this.shape,this.dataSync(),this.dtype==="complex64")}async data(){this.throwIfDisposed();let e=ns().read(this.dataId);if(this.dtype==="string"){let t=await e;try{return t.map(o=>Rp(o))}catch(o){throw new Error("Failed to decode the string bytes into utf-8. To get the original bytes, call tensor.bytes().")}}return e}dataToGPU(e){return this.throwIfDisposed(),ns().readToGPU(this.dataId,e)}dataSync(){this.throwIfDisposed();let e=ns().readSync(this.dataId);if(this.dtype==="string")try{return e.map(t=>Rp(t))}catch(t){throw new Error("Failed to decode the string bytes into utf-8. To get the original bytes, call tensor.bytes().")}return e}async bytes(){this.throwIfDisposed();let e=await ns().read(this.dataId);return this.dtype==="string"?e:new Uint8Array(e.buffer)}dispose(){this.isDisposed||(ns().disposeTensor(this),this.isDisposedInternal=!0)}get isDisposed(){return this.isDisposedInternal}throwIfDisposed(){if(this.isDisposed)throw new Error("Tensor is disposed.")}print(e=!1){return Dp.print(this,e)}clone(){return this.throwIfDisposed(),Dp.clone(this)}toString(e=!1){let t=this.dataSync();return f0(t,this.shape,this.dtype,e)}cast(e){return this.throwIfDisposed(),Dp.cast(this,e)}variable(e=!0,t,o){return this.throwIfDisposed(),ns().makeVariable(this,e,t,o)}};Object.defineProperty(it,Symbol.hasInstance,{value:r=>!!r&&r.data!=null&&r.dataSync!=null&&r.throwIfDisposed!=null});function wz(){return Gc("Tensor",()=>it)}wz();var Na=class extends it{constructor(e,t,o,n){super(e.shape,e.dtype,e.dataId,n),this.trainable=t,this.name=o}assign(e){if(e.dtype!==this.dtype)throw new Error(`dtype of the new value (${e.dtype}) and previous value (${this.dtype}) must match`);if(!Or(e.shape,this.shape))throw new Error(`shape of the new value (${e.shape}) and previous value (${this.shape}) must match`);ns().disposeTensor(this),this.dataId=e.dataId,ns().incRef(this,null)}dispose(){ns().disposeVariable(this),this.isDisposedInternal=!0}};Ob
2023-01-06 19:23:06 +01:00
with dtype ${s.dtype}. `)}),t.length===1)return Lr(t[0]);let o=t,n={axis:e};return T.runKernel(Cs,o,n)}var xt=N({concat_:wW});function IW(r,e,t=!1,o=!1){let n=v(r,"a","matMul"),s=v(e,"b","matMul");[n,s]=De(n,s);let a={a:n,b:s},i={transposeA:t,transposeB:o};return T.runKernel(zo,a,i)}var Xe=N({matMul_:IW});function vW(r){let t={x:v(r,"x","sigmoid","float32")};return T.runKernel(Gn,t)}var Ws=N({sigmoid_:vW});function kW(r,e,t){let o=v(r,"x","slice","string_or_numeric");if(o.rank===0)throw new Error("Slicing scalar is not possible");let n={x:o},s={begin:e,size:t};return T.runKernel(Es,n,s)}var Ke=N({slice_:kW});function NW(r){let t={x:v(r,"x","tanh","float32")};return T.runKernel(Zn,t)}var tl=N({tanh_:NW});function TW(r,e,t,o,n,s){let a=v(r,"forgetBias","basicLSTMCell"),i=v(e,"lstmKernel","basicLSTMCell"),p=v(t,"lstmBias","basicLSTMCell"),u=v(o,"data","basicLSTMCell"),c=v(n,"c","basicLSTMCell"),l=v(s,"h","basicLSTMCell"),m=xt([u,l],1),d=Xe(m,i),f=be(d,p),h=f.shape[0],g=f.shape[1]/4,x=[h,g],b=Ke(f,[0,0],x),C=Ke(f,[0,g],x),w=Ke(f,[0,g*2],x),k=Ke(f,[0,g*3],x),_=be(ne(Ws(b),tl(C)),ne(c,Ws(be(a,w)))),E=ne(tl(_),Ws(k));return[_,E]}var sv=N({basicLSTMCell_:TW});function _W(r,e,t){let o=v(r,"x","batchToSpaceND"),n=e.reduce((i,p)=>i*p);$(o.rank>=1+e.length,()=>`input rank is ${o.rank} but should be > than blockShape.length ${e.length}`),$(t.length===e.length,()=>`crops.length is ${t.length} but should be equal to blockShape.length ${e.length}`),$(o.shape[0]%n===0,()=>`input tensor batch is ${o.shape[0]} but is not divisible by the product of the elements of blockShape ${e.join(" * ")} === ${n}`);let s={x:o},a={blockShape:e,crops:t};return T.runKernel(bs,s,a)}var jm=N({batchToSpaceND_:_W});function av(r){let e;return r.rank===0||r.rank===1?e=W(r,[1,1,1,r.size]):r.rank===2?e=W(r,[1,1,r.shape[0],r.shape[1]]):r.rank===3?e=W(r,[1,r.shape[0],r.shape[1],r.shape[2]]):e=r,e}function $W(r,e,t,o,n,s){s==null&&(s=.001);let a=v(r,"x","batchNorm"),i=v(e,"mean","batchNorm"),p=v(t,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;o!=null&&(c=v(o,"offset","batchNorm")),$(i.rank===p.rank,()=>"Batch normalization gradient requires mean and variance to have equal ranks."),$(c==null||i.rank===c.rank,()=>"Batch normalization gradient requires mean and offset to have equal ranks."),$(u==null||i.rank===u.rank,()=>"Batch normalization gradient requires mean and scale to have equal ranks.");let m={x:av(a),scale:u,offset:c,mean:i,variance:p},d={varianceEpsilon:s},f=T.runKernel(sn,m,d);return W(f,a.shape)}var _i=N({batchNorm_:$W});function EW(r,e,t,o,n,s){let a=v(r,"x","batchNorm"),i=v(e,"mean","batchNorm"),p=v(t,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;return o!=null&&(c=v(o,"offset","batchNorm")),$(a.rank===2,()=>`Error in batchNorm2D: x must be rank 2 but got rank ${a.rank}.`),$(i.rank===2||i.rank===1,()=>`Error in batchNorm2D: mean must be rank 2 or rank 1 but got rank ${i.rank}.`),$(p.rank===2||p.rank===1,()=>`Error in batchNorm2D: variance must be rank 2 or rank 1 but got rank ${p.rank}.`),u!=null&&$(u.rank===2||u.rank===1,()=>`Error in batchNorm2D: scale must be rank 2 or rank 1 but got rank ${u.rank}.`),c!=null&&$(c.rank===2||c.rank===1,()=>`Error in batchNorm2D: offset must be rank 2 or rank 1 but got rank ${c.rank}.`),_i(a,i,p,c,u,s)}var iv=N({batchNorm2d_:EW});function AW(r,e,t,o,n,s){let a=v(r,"x","batchNorm"),i=v(e,"mean","batchNorm"),p=v(t,"variance","batchNorm"),u;n!=null&&(u=v(n,"scale","batchNorm"));let c;return o!=null&&(c=v(o,"offset","batchNorm")),$(a.rank===3,()=>`Error in batchNorm3D: x must be rank 3 but got rank ${a.rank}.`),$(i.rank===3||i.rank===1,()=>`Error in batchNorm3D: mean must be rank 3 or rank 1 but got rank ${i.rank}.`),$(p.rank===3||p.rank===1,()=>`Error in batchNorm3D: variance must be rank 3 or rank 1 but got rank ${p.rank}.`),u!=null&&$(u.rank===3||u.rank===1,()=>`Error in batchNorm3D: scale must be rank 3 or rank 1 but got rank ${u.rank}.`),c!=null&&$(c.rank===3||c.rank===1,()=>`Error in batchNorm3D: offset must be rank 3 or rank 1 but got rank ${c.rank}.`),_i(
${n} and ${e} for depthToSpace with input shape
${o.shape}`),$(s*e>=0,()=>`Negative dimension size caused by overflow when multiplying
${s} and ${e} for depthToSpace with input shape
${o.shape}`),$(a%(e*e)===0,()=>`Dimension size must be evenly divisible by ${e*e} but is ${a} for depthToSpace with input shape ${o.shape}`);let i={x:o},p={blockSize:e,dataFormat:t};return T.runKernel(Yo,i,p)}var Tv=N({depthToSpace_:eU});function tU(r,e,t,o,n="NHWC",s=[1,1],a){let i=v(r,"x","depthwiseConv2d","float32"),p=v(e,"filter","depthwiseConv2d","float32"),u=i,c=!1;i.rank===3&&(c=!0,u=W(i,[1,i.shape[0],i.shape[1],i.shape[2]])),$(u.rank===4,()=>`Error in depthwiseConv2d: input must be rank 4, but got rank ${u.rank}.`),$(p.rank===4,()=>`Error in depthwiseConv2d: filter must be rank 4, but got rank ${p.rank}.`);let l=n==="NHWC"?u.shape[3]:u.shape[1];$(l===p.shape[2],()=>`Error in depthwiseConv2d: number of input channels (${l}) must match the inChannels dimension in filter ${p.shape[2]}.`),Mt("depthwiseConv2d",o,a);let m={x:u,filter:p},d={strides:t,pad:o,dataFormat:n,dilations:s,dimRoundingMode:a},f=T.runKernel(Qo,m,d);return c?W(f,[f.shape[1],f.shape[2],f.shape[3]]):f}var Bp=N({depthwiseConv2d_:tU});function rU(r){let t={x:v(r,"x","diag")};return T.runKernel(si,t)}var _v=N({diag_:rU});function oU(r,e,t,o,n=[1,1],s="NHWC"){let a=v(r,"x","dilation2d"),i=v(e,"filter","dilation2d");$(a.rank===3||a.rank===4,()=>`Error in dilation2d: input must be rank 3 or 4, but got rank ${a.rank}.`),$(i.rank===3,()=>`Error in dilation2d: filter must be rank 3, but got rank ${i.rank}.`),$(s==="NHWC",()=>`Error in dilation2d: Only NHWC is currently supported, but got dataFormat of ${s}`);let p=a,u=!1;a.rank===3&&(p=W(a,[1,a.shape[0],a.shape[1],a.shape[2]]),u=!0),$(p.shape[3]===i.shape[2],()=>`Error in dilation2d: input and filter must have the same depth: ${p.shape[3]} vs ${i.shape[2]}`);let c={x:p,filter:i},l={strides:t,pad:o,dilations:n},m=T.runKernel(ai,c,l);return u?W(m,[m.shape[1],m.shape[2],m.shape[3]]):m}var $v=N({dilation2d_:oU});var yr={};Ge(yr,{assertAndGetBroadcastShape:()=>Je,getBroadcastDims:()=>Ev,getReductionAxes:()=>Qm});function Ev(r,e){let t=r.length,o=[];for(let n=0;n<t;n++){let s=t-1-n,a=r[s]||1;(e[e.length-1-n]||1)>1&&a===1&&o.unshift(s)}return o}function Qm(r,e){let t=[];for(let o=0;o<e.length;o++){let n=r[r.length-o-1],s=e.length-o-1,a=e[s];(n==null||n===1&&a>1)&&t.unshift(s)}return t}function Je(r,e){let t=[],o=Math.max(r.length,e.length);for(let n=0;n<o;n++){let s=r[r.length-n-1];s==null&&(s=1);let a=e[e.length-n-1];if(a==null&&(a=1),s===1)t.unshift(a);else if(a===1)t.unshift(s);else if(s!==a){let i=`Operands could not be broadcast together with shapes ${r} and ${e}.`;throw Error(i)}else t.unshift(s)}return t}function nU(r,e){let t=v(r,"a","equal","string_or_numeric"),o=v(e,"b","equal","string_or_numeric");[t,o]=De(t,o),Je(t.shape,o.shape);let n={a:t,b:o};return T.runKernel(en,n)}var Zm=N({equal_:nU});function sU(r,e,t){let o=v(e,"a","where"),n=v(t,"b","where"),s=v(r,"condition","where","bool"),a=Je(Je(s.shape,o.shape),n.shape),i=$i(s,a),p=$i(o,a),u=$i(n,a),c={condition:i,t:p,e:u};return T.runKernel($s,c)}var ss=N({where_:sU});function aU(r){let t={x:v(r,"x","zerosLike")};return T.runKernel(Os,t)}var Ut=N({zerosLike_:aU});function iU(r,e){let t=v(r,"a","div"),o=v(e,"b","div");[t,o]=De(t,o);let n=He(t,o),s=Ut(n),a=Zm(o,s);return ss(a,s,n)}var Av=N({divNoNan_:iU});function uU(r,e){let t=v(r,"t1","dot"),o=v(e,"t2","dot");$((t.rank===1||t.rank===2)&&(o.rank===1||o.rank===2),()=>`Error in dot: inputs must all be rank 1 or 2, but got ranks ${t.rank} and ${o.rank}.`);let n=t.rank===1?t.size:t.shape[1],s=o.rank===1?o.size:o.shape[0];if($(n===s,()=>`Error in dot: inner dimensions of inputs must match, but got ${n} and ${s}.`),t.rank===1&&o.rank===1){let a=W(t,[1,-1]),i=W(o,[-1,1]),p=Xe(a,i);return W(p,[])}else if(t.rank===1&&o.rank===2){let a=W(t,[1,-1]),i=W(o,[o.shape[0],o.shape[1]]),p=Xe(a,i);return W(p,[p.size])}else if(t.rank===2&&o.rank===1){let a=W(o,[-1,1]),i=Xe(t,a);return W(i,[i.size])}else{let a=W(o,[o.shape[0],o.shape[1]]);return Xe(t,a)}}var Rv=N({dot_:uU});function pU(r,...e){let t=e.map((n,s)=>v(n,`tensors${s}`,"einsum")),o={equation:r};return T.runKernel(ii,t,o)}var Dv=N({einsum_:pU});function cU(r
rank ${s.rank}.`),$(sa(e),()=>`Error in localResponseNormalization: depthRadius must be an integer but got depthRadius ${e}.`);let a=s,i=!1;s.rank===3&&(i=!0,a=W(s,[1,s.shape[0],s.shape[1],s.shape[2]]));let p={x:a},u={depthRadius:e,bias:t,alpha:o,beta:n},c=T.runKernel(mi,p,u);return i?W(c,[c.shape[1],c.shape[2],c.shape[3]]):c}var Gv=N({localResponseNormalization_:WU});function UU(r){let t={x:v(r,"x","log","float32")};return T.runKernel(fn,t)}var Da=N({log_:UU});function GU(r){let t={x:v(r,"x","log1p")};return T.runKernel(xa,t)}var sd=N({log1p_:GU});function HU(r){return $(gs(r),()=>"The f passed in grad(f) must be a function"),(e,t)=>{let o=v(e,"x","tf.grad","string_or_numeric"),n=t!=null?v(t,"dy","tf.grad"):null;return T.tidy(()=>{let{value:s,grads:a}=T.gradients(()=>r(o),[o],n);return n!=null&&gt(s.shape,n.shape,"The shape of dy passed in grad(f)(x, dy) must match the shape returned by f(x)"),ad(a),a[0]})}}function KU(r){return $(gs(r),()=>"The f passed in grads(f) must be a function"),(e,t)=>{$(Array.isArray(e),()=>"The args passed in grads(f)(args) must be an array of `Tensor`s or `TensorLike`s");let o=_a(e,"args","tf.grads","string_or_numeric"),n=t!=null?v(t,"dy","tf.grads"):null;return T.tidy(()=>{let{value:s,grads:a}=T.gradients(()=>r(...o),o,n);return n!=null&&gt(s.shape,n.shape,"The shape of dy passed in grads(f)([x1,...], dy) must match the shape returned by f([x1,...])"),ad(a),a})}}function qU(r){return $(gs(r),()=>"The f passed in valueAndGrad(f) must be a function"),(e,t)=>{$(e instanceof it,()=>"The x passed in valueAndGrad(f)(x) must be a tensor"),$(t==null||t instanceof it,()=>"The dy passed in valueAndGrad(f)(x, dy) must be a tensor");let{grads:o,value:n}=T.gradients(()=>r(e),[e],t);return ad(o),{grad:o[0],value:n}}}function jU(r){return $(gs(r),()=>"The f passed in valueAndGrads(f) must be a function"),(e,t)=>{$(Array.isArray(e)&&e.every(n=>n instanceof it),()=>"The args passed in valueAndGrads(f)(args) must be array of tensors"),$(t==null||t instanceof it,()=>"The dy passed in valueAndGrads(f)(args, dy) must be a tensor");let o=T.gradients(()=>r(...e),e,t);return t!=null&&gt(o.value.shape,t.shape,"The shape of dy passed in valueAndGrads(f)([x1,...], dy) must match the shape returned by f([x1,...])"),ad(o.grads),o}}function pC(r,e){$(gs(r),()=>"The f passed in variableGrads(f) must be a function"),$(e==null||Array.isArray(e)&&e.every(u=>u instanceof Na),()=>"The varList passed in variableGrads(f, varList) must be an array of variables");let t=e!=null;if(!t){e=[];for(let u in T.registeredVariables)e.push(T.registeredVariables[u])}let o=t?e.filter(u=>!u.trainable):null,n=e.length;e=e.filter(u=>u.trainable),$(e.length>0,()=>`variableGrads() expects at least one of the input variables to be trainable, but none of the ${n} variables is trainable.`);let s=!0,{value:a,grads:i}=T.gradients(r,e,null,s);$(i.some(u=>u!=null),()=>"Cannot find a connection between any variable and the result of the loss function y=f(x). Please make sure the operations that use variables are inside the function f passed to minimize()."),$(a.rank===0,()=>`The f passed in variableGrads(f) must return a scalar, but it returned a rank-${a.rank} tensor`);let p={};return e.forEach((u,c)=>{i[c]!=null&&(p[u.name]=i[c])}),o!=null&&o.forEach(u=>p[u.name]=null),{value:a,grads:p}}function br(r){return T.customGrad(r)}function ad(r){if(r.filter(t=>t==null).length>0)throw new Error(`Cannot compute gradient of y=f(x) with respect to x. Make sure that
the f you passed encloses all operations that lead from x to y.`)}function XU(r){let t={x:v(r,"x","neg")};return T.runKernel(vs,t)}var Cr=N({neg_:XU});function YU(r){let t={x:v(r,"x","softplus")};return T.runKernel(Ia,t)}var id=N({softplus_:YU});function QU(r){let e=v(r,"x","logSigmoid");return br(o=>({value:Cr(id(Cr(o))),gradFunc:a=>ne(a,Ws(Cr(o)))}))(e)}var Hv=N({logSigmoid_:QU});function ZU(r,e){let t=v(r,"a","sub"),o=v(e,"b","sub");[t,o]=De(t,o);let n={a:t,b:o};return T.runKernel(Yn,n)}var ke=N({sub_:ZU});function JU(r,e=-1){let t=v(r,"logits","logSoftmax");if(e===-1&&(e=t.rank-1),e!==t.rank-1)throw Error(`Log Softmax along a non-last dimension is not yet supported. Logits was rank ${t.rank} and axis was ${e}`);return br((n,s)=>{let i=Gs(n,e,!0),p=ke(n,i),u=ke(je(p,"float32"),Da(et(So(p),e,!0)));return s([u]),{value:u,gradFunc:(l,m)=>{let[d]=m,f=!0,h=So(d);return ke(l,ne(et(l,e,f),h))}}})(t)}var Kv=N({logSoftmax_:JU});function e4(r,e=null,t=!1){let o=v(r,"x","logSumExp"),n=Ja(e,o.shape),s=Gs(o,n,!0),a=ke(o,s),i=So(a),p=et(i,n),u=Da(p),c=be(W(s,u.shape),u);if(t){let l=Ea(c.shape,n);return W(c,l)}return c}var ud=N({logSumExp_:e4});function t4(r,e){let t=v(r,"a","logicalAnd","bool"),o=v(e,"b","logicalAnd","bool");Je(t.shape,o.shape);let n={a:t,b:o};return T.runKernel(hn,n)}var pu=N({logicalAnd_:t4});function r4(r){let t={x:v(r,"x","logicalNot","bool")};return T.runKernel(gn,t)}var pd=N({logicalNot_:r4});function o4(r,e){let t=v(r,"a","logicalOr","bool"),o=v(e,"b","logicalOr","bool");Je(t.shape,o.shape);let n={a:t,b:o};return T.runKernel(xn,n)}var cd=N({logicalOr_:o4});function n4(r,e){let t=v(r,"a","logicalXor","bool"),o=v(e,"b","logicalXor","bool");return Je(t.shape,o.shape),pu(cd(r,e),pd(pu(r,e)))}var qv=N({logicalXor_:n4});var ld=2147483648;function s4(r,e,t="left"){let o=v(r,"sortedSequence","searchSorted"),n=v(e,"values","searchSorted"),s=o.shape[o.shape.length-1],a=n.shape[n.shape.length-1],i=W(o,[-1,s]),p=W(n,[-1,a]);if(i.rank<2)throw new Error("Sorted input argument must be at least 2-dimensional");if(i.shape[0]!==p.shape[0])throw new Error("Leading dimension of 'sortedSequence' and 'values' must match.");if(We(p.shape)>=ld)throw new Error(`values tensor size must less than ${ld}`);if(i.shape[1]>=ld)throw new Error(`trailing dim_size must less than ${ld} for int32 output type, was ${i.shape[1]}`);let u={sortedSequence:i,values:p},c={side:t};return T.runKernel(fi,u,c)}var ol=N({searchSorted_:s4});function jv(r,e){return ol(r,e,"left")}function a4(r,e,t,o,n){let s=v(r,"x","maxPool"),a=1,i=s,p=!1;s.rank===3&&(p=!0,i=W(s,[1,s.shape[0],s.shape[1],s.shape[2]])),$(i.rank===4,()=>`Error in maxPool: input must be rank 4 but got rank ${i.rank}.`),$(mr(t,a),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${t} and dilations '${a}'`),Mt("maxPool",o,n);let u={x:i},c={filterSize:e,strides:t,pad:o,dimRoundingMode:n},l=T.runKernel(Cn,u,c);return p?W(l,[l.shape[1],l.shape[2],l.shape[3]]):l}var md=N({maxPool_:a4});function i4(r,e=[1,1,1],t,o,n,s="NDHWC"){let a=v(r,"x","maxPool3d"),i=a,p=!1;a.rank===4&&(p=!0,i=W(a,[1,a.shape[0],a.shape[1],a.shape[2],a.shape[3]])),$(i.rank===5,()=>`Error in maxPool3d: x must be rank 5 but got rank ${i.rank}.`),$(s==="NDHWC",()=>`Error in maxPool3d: Only NDHWC is currently supported, but got dataFormat of ${s}`),Mt("maxPool3d",o,n);let u={x:i},c={filterSize:e,strides:t,pad:o,dimRoundingMode:n,dataFormat:s},l=T.runKernel(Cp,u,c);return p?W(l,[l.shape[1],l.shape[2],l.shape[3],l.shape[4]]):l}var Xv=N({maxPool3d_:i4});function u4(r,e,t,o,n=!1){let a={x:v(r,"x","maxPoolWithArgmax")},i={filterSize:e,strides:t,pad:o,includeBatchInIndex:n},p=T.runKernel(Sp,a,i);return{result:p[0],indexes:p[1]}}var Yv=N({maxPoolWithArgmax_:u4});function p4(r,e){let t=v(r,"a","maximum"),o=v(e,"b","maximum");[t,o]=De(t,o),t.dtype==="bool"&&(t=je(t,"int32"),o=je(o,"int32")),Je(t.shape,o.shape);let n={a:t,b:o};return T.runKernel(bn,n)}var dd=N({maximum_:p4});function c4(r,e=null,t=!1){let n={x:v(r,"x","mean")},s={axis:e,keepDims:t};return T.runKernel(Sn,n,s)}var cu=N({mean_:c4});function Br(r,e="flo
2022-11-18 17:13:29 +01:00
Actual: ${n}.
Expected: ${s}.`);for(let a=0;a<s.length;++a){let i=n[a],p=s[a];if(!t(i,p))throw new Error(`Arrays differ: actual[${a}] = ${i}, expected[${a}] = ${p}.
Actual: ${n}.
2023-01-06 19:23:06 +01:00
Expected: ${s}.`)}typeof expect!="undefined"&&expect().nothing()}function G4(r,e){r().then(()=>e.fail(),()=>e()),typeof expect!="undefined"&&expect().nothing()}function H4(r,e){let t=typeof e=="string"||typeof e=="number"||typeof e=="boolean"?[e]:e;return Oo(r)||Oo(r[0])||Oo(e)||Oo(e[0])?xC(r,t,(o,n)=>o==n):xC(r,e,(o,n)=>yC(o,n,0))}function Rk(r,e,t){if(t==null&&(t=bd()),!yC(r,e,t))throw new Error(`Numbers differ: actual === ${r}, expected === ${e}`);typeof expect!="undefined"&&expect().nothing()}function yC(r,e,t){return!isFinite(r)&&!isFinite(e)?!0:!(isNaN(r)||isNaN(e)||Math.abs(r-e)>t)}function K4(r,e,t){for(let o=0;o<r.length;o++)if(r[o]<e||r[o]>t)throw new Error(`Value out of range:${r[o]} low: ${e}, high: ${t}`)}function q4(r,e){let t=new Float32Array(r),o=new Float32Array(e);if(t.length!==o.length)throw new Error(`Expected ArrayBuffer to be of length ${o.length}, but it was ${t.length}`);for(let n=0;n<o.length;n++)if(t[n]!==o[n])throw new Error(`Expected ArrayBuffer value at ${n} to be ${o[n]} but got ${t[n]} instead`)}function Dk(r){for(let e=0;e<r.length;e++){let t=r[e];Array.isArray(t)?Dk(t):r[e]=Ii(t)}return r}function j4(r){let e=document.createElement("video");return"playsInline"in e&&(e.playsInline=!0),e.muted=!0,e.loop=!0,e.style.position="fixed",e.style.left="0px",e.style.top="0px",e.preload="auto",e.appendChild(r),new Promise(t=>{e.addEventListener("loadeddata",o=>t(e)),e.load()})}async function X4(r){await r.play(),"requestVideoFrameCallback"in r&&await new Promise(e=>{r.requestVideoFrameCallback(e)})}var mu=class{constructor(e,t,o,n,s){this.mean=e,this.stdDev=t,this.dtype=o,this.nextVal=NaN,this.truncated=n,this.truncated&&(this.upper=this.mean+this.stdDev*2,this.lower=this.mean-this.stdDev*2);let a=s||Math.random();this.random=wd.alea(a.toString())}nextValue(){if(!isNaN(this.nextVal)){let n=this.nextVal;return this.nextVal=NaN,n}let e,t,o=!1;for(;!o;){let n,s,a;do n=2*this.random()-1,s=2*this.random()-1,a=n*n+s*s;while(a>=1||a===0);let i=Math.sqrt(-2*Math.log(a)/a);e=this.mean+this.stdDev*n*i,t=this.mean+this.stdDev*s*i,(!this.truncated||this.isValidTruncated(e))&&(o=!0)}return(!this.truncated||this.isValidTruncated(t))&&(this.nextVal=this.convertValue(t)),this.convertValue(e)}convertValue(e){return this.dtype==null||this.dtype==="float32"?e:Math.round(e)}isValidTruncated(e){return e<=this.upper&&e>=this.lower}},Cd=class{constructor(e,t,o,n){this.alpha=e,this.beta=1/t,this.dtype=o;let s=n||Math.random();this.randu=wd.alea(s.toString()),this.randn=new mu(0,1,o,!1,this.randu()),e<1?this.d=e+2/3:this.d=e-1/3,this.c=1/Math.sqrt(9*this.d)}nextValue(){let e,t,o,n,s,a;for(;;){do n=this.randn.nextValue(),a=1+this.c*n;while(a<=0);if(a*=a*a,e=n*n,t=1-.331*e*e,o=.5*e+this.d*(1-a+Math.log(a)),s=this.randu(),s<t||Math.log(s)<o)break}return a=1/this.beta*this.d*a,this.alpha<1&&(a*=Math.pow(this.randu(),1/this.alpha)),this.convertValue(a)}convertValue(e){return this.dtype==="float32"?e:Math.round(e)}},Sd=class{constructor(e=0,t=1,o,n){if(this.canReturnFloat=()=>this.dtype==null||this.dtype==="float32",this.min=e,this.range=t-e,this.dtype=o,n==null&&(n=Math.random()),typeof n=="number"&&(n=n.toString()),!this.canReturnFloat()&&this.range<=1)throw new Error(`The difference between ${e} - ${t} <= 1 and dtype is not float`);this.random=wd.alea(n)}convertValue(e){return this.canReturnFloat()?e:Math.round(e)}nextValue(){return this.convertValue(this.min+this.range*this.random())}};function Y4(r,e,t=1,o="float32",n){if(bt(r),t==null&&(t=1),o==null&&(o="float32"),o!=="float32"&&o!=="int32")throw new Error(`Unsupported data type ${o}`);let s=new Cd(e,t,o,n),a=le(r,o);for(let i=0;i<a.values.length;i++)a.values[i]=s.nextValue();return a.toTensor()}var Ok=N({randomGamma_:Y4});function Q4(r,e=0,t=1,o,n){if(bt(r),o!=null&&o==="bool")throw new Error(`Unsupported data type ${o}`);let s=new mu(e,t,o,!1,n),a=le(r,o);for(let i=0;i<a.values.length;i++)a.values[i]=s.nextValue();return a.toTensor()}var Id=N({randomNormal_:Q4});function Z4(r,e,t){if(e!=null&&e==="bool")throw new Error(`Unsupported data type ${e}`);return Id(r,0,1,e,t)}va
${n.shape}`);if(s.rank!==1)throw new Error(`Values should be Tensor1D but received shape ${s.shape}`);if(a.rank!==1)throw new Error(`Dense shape should be Tensor1D but received shape ${a.shape}`);if(i.rank!==0)throw new Error(`Default value should be a scalar but received shape ${i.shape}`);let p={indices:n,values:s,denseShape:a,defaultValue:i},u=T.runKernel(hi,p);return{outputIndices:u[0],outputValues:u[1],emptyRowIndicator:u[2],reverseIndexMap:u[3]}}var Y1=N({sparseFillEmptyRows_:HH});function KH(r,e,t){let o=v(r,"inputIndices","sparseReshape","int32"),n=v(e,"inputShape","sparseReshape","int32"),s=v(t,"newShape","sparseReshape","int32");if(o.rank!==2)throw new Error(`Input indices should be Tensor2D but received shape
${o.shape}`);if(n.rank!==1)throw new Error(`Input shape should be Tensor1D but received shape ${n.shape}`);if(s.rank!==1)throw new Error(`New shape should be Tensor1D but received shape ${s.shape}`);let a={inputIndices:o,inputShape:n,newShape:s},i=T.runKernel(va,a);return{outputIndices:i[0],outputShape:i[1]}}var Q1=N({sparseReshape_:KH});function qH(r,e,t){let o=v(r,"data","sparseSegmentMean"),n=v(e,"indices","sparseSegmentMean","int32"),s=v(t,"segmentIds","sparseSegmentMean","int32");if(o.rank<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.rank!==1)throw new Error(`Indices should be Tensor1D but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.rank!==1)throw new Error(`Segment ids should be Tensor1D but received shape
2023-01-06 19:23:06 +01:00
${s.shape}`);let a={data:o,indices:n,segmentIds:s};return T.runKernel(gi,a)}var Z1=N({sparseSegmentMean_:qH});function jH(r,e,t){let o=v(r,"data","sparseSegmentSum"),n=v(e,"indices","sparseSegmentSum","int32"),s=v(t,"segmentIds","sparseSegmentSum","int32");if(o.rank<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.rank!==1)throw new Error(`Indices should be Tensor1D but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.rank!==1)throw new Error(`Segment ids should be Tensor1D but received shape
2023-01-06 19:23:06 +01:00
${s.shape}`);let a={data:o,indices:n,segmentIds:s};return T.runKernel(xi,a)}var J1=N({sparseSegmentSum_:jH});function XH(r,e,t,o,n,s,a,i){let p=v(r,"data","stringNGrams","string");if(p.dtype!=="string")throw new Error("Data must be of datatype string");if(p.shape.length!==1)throw new Error(`Data must be a vector, saw: ${p.shape}`);let u=v(e,"dataSplits","stringNGrams");if(u.dtype!=="int32")throw new Error("Data splits must be of datatype int32");let c={separator:t,nGramWidths:o,leftPad:n,rightPad:s,padWidth:a,preserveShortSequences:i},l={data:p,dataSplits:u},m=T.runKernel(Ds,l,c);return{nGrams:m[0],nGramsSplits:m[1]}}var eN=N({stringNGrams_:XH});function YH(r,e,t=!0){let o=v(r,"input","stringSplit","string"),n=v(e,"delimiter","stringSplit","string");if(o.rank!==1)throw new Error(`Input should be Tensor1D but received shape ${o.shape}`);if(n.rank!==0)throw new Error(`Delimiter should be a scalar but received shape ${n.shape}`);let s={skipEmpty:t},a={input:o,delimiter:n},i=T.runKernel(Ci,a,s);return{indices:i[0],values:i[1],shape:i[2]}}var tN=N({stringSplit_:YH});function QH(r,e){let t=v(r,"input","stringToHashBucketFast","string"),o={numBuckets:e};if(e<=0)throw new Error("Number of buckets must be at least 1");let n={input:t};return T.runKernel(Si,n,o)}var rN=N({stringToHashBucketFast_:QH});var ZH={fft:zp,ifft:du,rfft:Wp,irfft:Td},JH={hammingWindow:C1,hannWindow:Fd,frame:Od,stft:S1},eK={flipLeftRight:I1,grayscaleToRGB:v1,resizeNearestNeighbor:O1,resizeBilinear:F1,rotateWithOffset:k1,cropAndResize:w1,nonMaxSuppression:N1,nonMaxSuppressionAsync:$1,nonMaxSuppressionWithScore:E1,nonMaxSuppressionWithScoreAsync:A1,nonMaxSuppressionPadded:R1,nonMaxSuppressionPaddedAsync:D1,threshold:P1,transform:M1},tK={bandPart:L1,gramSchmidt:B1,qr:z1},rK={absoluteDifference:W1,computeWeightedLoss:ar,cosineDistance:U1,hingeLoss:G1,huberLoss:H1,logLoss:K1,meanSquaredError:q1,sigmoidCrossEntropy:j1,softmaxCrossEntropy:X1},oK={sparseFillEmptyRows:Y1,sparseReshape:Q1,sparseSegmentMean:Z1,sparseSegmentSum:J1},nK={stringNGrams:eN,stringSplit:tN,stringToHashBucketFast:rN};var oN={};Ge(oN,{Serializable:()=>il,SerializationMap:()=>qs,registerClass:()=>IC});var il=class{getClassName(){return this.constructor.className}static fromConfig(e,t){return new e(t)}},qs=class{constructor(){this.classNameMap={}}static getMap(){return qs.instance==null&&(qs.instance=new qs),qs.instance}static register(e){qs.getMap().classNameMap[e.className]=[e,e.fromConfig]}};function IC(r){$(r.className!=null,()=>"Class being registered does not have the static className property defined."),$(typeof r.className=="string",()=>"className is required to be a string, but got type "+typeof r.className),$(r.className.length>0,()=>"Class being registered has an empty-string as its className, which is disallowed."),qs.register(r)}var wr=class extends il{minimize(e,t=!1,o){let{value:n,grads:s}=this.computeGradients(e,o);if(o!=null){let a=o.map(i=>({name:i.name,tensor:s[i.name]}));this.applyGradients(a)}else this.applyGradients(s);return Ot(s),t?n:(n.dispose(),null)}get iterations(){return this.iterations_==null&&(this.iterations_=0),this.iterations_}incrementIterations(){this.iterations_=this.iterations+1}computeGradients(e,t){return pC(e,t)}dispose(){this.iterations_!=null&&Ot(this.iterations_)}async saveIterations(){return this.iterations_==null&&(this.iterations_=0),{name:"iter",tensor:Ce(this.iterations_,"int32")}}async getWeights(){throw new Error("getWeights() is not implemented for this optimizer yet.")}async setWeights(e){throw new Error(`setWeights() is not implemented for this optimizer class ${this.getClassName()}`)}async extractIterations(e){return this.iterations_=(await e[0].tensor.data())[0],e.slice(1)}};Object.defineProperty(wr,Symbol.hasInstance,{value:r=>r.minimize!=null&&r.computeGradients!=null&&r.applyGradients!=null});var yu=class extends wr{constructor(e,t,o=null){super(),this.learningRate=e,this.rho=t,this.epsilon=o,this.accumulatedGrads=[],this.accumulatedUpdates=[],o==null&&(this.epsilon=T.backend.epsilon())}static get className(){return"Adadelta"}appl
2023-01-07 21:50:37 +01:00
Manifest JSON has weights with names: ${i.join(", ")}.`)}let p=n.reduce((d,f,h)=>(f&&d.push(h),d),[]),u=[];p.forEach(d=>{e[d].paths.forEach(f=>{let h=t+(t.endsWith("/")?"":"/")+f;u.push(h)})});let c=await r(u),l={},m=0;return p.forEach(d=>{let f=e[d].paths.length,h=0;for(let w=0;w<f;w++)h+=c[m+w].byteLength;let g=new ArrayBuffer(h),x=new Uint8Array(g),b=0;for(let w=0;w<f;w++){let k=new Uint8Array(c[m+w]);x.set(k,b),b+=k.byteLength}s[d].forEach(w=>{let k=g.slice(w.groupOffset,w.groupOffset+w.sizeBytes),_=Bm(k,[w.manifestEntry]);for(let E in _)l[E]=_[E]}),m+=f}),l}}var lK="application/octet-stream",mK="application/json",ul=class{constructor(e,t){if(this.DEFAULT_METHOD="POST",t==null&&(t={}),this.weightPathPrefix=t.weightPathPrefix,this.onProgress=t.onProgress,this.weightUrlConverter=t.weightUrlConverter,t.fetchFunc!=null?($(typeof t.fetchFunc=="function",()=>"Must pass a function that matches the signature of `fetch` (see https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API)"),this.fetch=t.fetchFunc):this.fetch=O().platform.fetch,$(e!=null&&e.length>0,()=>"URL path for http must not be null, undefined or empty."),Array.isArray(e)&&$(e.length===2,()=>`URL paths for http must have a length of 2, (actual length is ${e.length}).`),this.path=e,t.requestInit!=null&&t.requestInit.body!=null)throw new Error("requestInit is expected to have no pre-existing body, but has one.");this.requestInit=t.requestInit||{}}async save(e){if(e.modelTopology instanceof ArrayBuffer)throw new Error("BrowserHTTPRequest.save() does not support saving model topology in binary formats yet.");let t=Object.assign({method:this.DEFAULT_METHOD},this.requestInit);t.body=new FormData;let o=[{paths:["./model.weights.bin"],weights:e.weightSpecs}],n=Vm(e,o);t.body.append("model.json",new Blob([JSON.stringify(n)],{type:mK}),"model.json"),e.weightData!=null&&t.body.append("model.weights.bin",new Blob([e.weightData],{type:lK}),"model.weights.bin");let s=await this.fetch(this.path,t);if(s.ok)return{modelArtifactsInfo:Ms(e),responses:[s]};throw new Error(`BrowserHTTPRequest.save() failed due to HTTP response status ${s.status}.`)}async load(){let e=await this.fetch(this.path,this.requestInit);if(!e.ok)throw new Error(`Request to ${this.path} failed with status code ${e.status}. Please verify this URL points to the model JSON of the model to load.`);let t;try{t=await e.json()}catch(s){let a=`Failed to parse model JSON of response from ${this.path}.`;throw this.path.endsWith(".pb")?a+=" Your path contains a .pb file extension. Support for .pb models have been removed in TensorFlow.js 1.0 in favor of .json models. You can re-convert your Python TensorFlow model using the TensorFlow.js 1.0 conversion scripts or you can convert your.pb models with the 'pb2json'NPM script in the tensorflow/tfjs-converter repository.":a+=" Please make sure the server is serving valid JSON for this request.",new Error(a)}let o=t.modelTopology,n=t.weightsManifest;if(o==null&&n==null)throw new Error(`The JSON from HTTP path ${this.path} contains neither model topology or manifest for weights.`);return Op(t,s=>this.loadWeights(s))}async loadWeights(e){let t=Array.isArray(this.path)?this.path[1]:this.path,[o,n]=dK(t),s=this.weightPathPrefix||o,a=zm(e),i=[],p=[];for(let c of e)for(let l of c.paths)this.weightUrlConverter!=null?p.push(this.weightUrlConverter(l)):i.push(s+l+n);this.weightUrlConverter&&i.push(...await Promise.all(p));let u=await NC(i,{requestInit:this.requestInit,fetchFunc:this.fetch,onProgress:this.onProgress});return[a,Fp(u)]}};ul.URL_SCHEME_REGEX=/^https?:\/\//;function dK(r){let e=r.lastIndexOf("/"),t=r.lastIndexOf("?"),o=r.substring(0,e),n=t>e?r.substring(t):"";return[o+"/",n]}function Bd(r){return r.match(ul.URL_SCHEME_REGEX)!=null}var uN=(r,e)=>{if(typeof fetch=="undefined"&&(e==null||e.fetchFunc==null))return null;{let t=!0;if(Array.isArray(r)?t=r.every(o=>Bd(o)):t=Bd(r),t)return Vd(r,e)}return null};mt.registerSaveRouter(uN);mt.registerLoadRouter(uN);function Vd(r,e){return new ul(r,e)}function pN(r,e){return Vd(r,e)}var pl=class{constructor(e){this.modelArtifacts=e}
2023-01-06 19:23:06 +01:00
indices.shape[0] = ${r}`}function yq(r,e){return`indices(${r}, 0) is invalid: ${e} < 0`}function bq(r,e,t){return`indices(${r}, 0) is invalid: ${e} >= ${t}`}function Cq(r,e){return`only one output dimension may be -1, not both ${r} and ${e}`}function Sq(r,e){return`size ${r} must be non-negative, not ${e}`}function wq(){return"reshape cannot infer the missing input size for an empty tensor unless all specified input sizes are non-zero"}function Iq(r,e){let t=We(r),o=We(e);return`Input to reshape is a SparseTensor with ${t}
dense values, but the requested shape requires a multiple of ${o}. inputShape=${r} outputShape= ${e}`}function vq(r,e){let t=We(r),o=We(e);return`Input to reshape is a tensor with ${t} dense values, but the requested shape has ${o}. inputShape=${r} outputShape=${e}`}function kq(){return"segment ids must be >= 0"}function Nq(){return"segment ids are not increasing"}function Tq(r,e){return`Segment id ${r} out of range [0, ${e}), possibly because segmentIds input is not sorted.`}function _q(r,e,t){return`Bad: indices[${r}] == ${e} out of range [0, ${t})`}var DC={};Ge(DC,{collectGatherOpShapeInfo:()=>Aq,computeOutShape:()=>Eq,segOpComputeOptimalWindowSize:()=>$q});function $q(r,e){let t=!1,o;for(r<=Ud?(o=r,t=!0):o=cp(r,Math.floor(Math.sqrt(r)));!t;)o>e||o===r?t=!0:o=cp(r,o+1);return o}function Eq(r,e,t){let o=[],n=r.length;for(let s=0;s<n;s++)s!==e?o.push(r[s]):o.push(t);return o}function Aq(r,e,t,o){let n=e.shape.length,s=r.shape.length;if(o!==0&&(o<-n||o>n))throw new Error(`Expect batchDims in the range of [-${n}, ${n}], but got ${o}`);if(o<0&&(o+=n),o>s)throw new Error(`batchDims (${o}) must be less than rank(x) (
2023-01-07 21:50:37 +01:00
${s}).`);if(t<o)throw new Error(`batchDims (${o}) must be less than or equal to axis (${t}).`);for(let l=0;l<o;++l)if(r.shape[l]!==e.shape[l])throw new Error(`x.shape[${l}]: ${r.shape[l]} should be equal to indices.shape[${l}]: ${e.shape[l]}.`);let a=r.shape[t],i=[],p=1,u=1,c=1;for(let l=0;l<o;++l)i.push(r.shape[l]),p*=r.shape[l];for(let l=o;l<t;l++)i.push(r.shape[l]),u*=r.shape[l];for(let l=o;l<n;l++)i.push(e.shape[l]);for(let l=t+1;l<s;l++)i.push(r.shape[l]),c*=r.shape[l];return{batchSize:p,sliceSize:c,outerSize:u,dimSize:a,outputShape:i}}function Rq(r){try{return r.map(e=>Rp(e))}catch(e){throw new Error(`Failed to decode encoded string bytes into utf-8, error: ${e}`)}}function Dq(r){return r.map(e=>Ii(e))}var Vt={};Ge(Vt,{nonMaxSuppressionV3Impl:()=>Pd,nonMaxSuppressionV4Impl:()=>Md,nonMaxSuppressionV5Impl:()=>Ld,whereImpl:()=>Ad});nN();var Fq=O();Fq.registerFlag("KEEP_INTERMEDIATE_TENSORS",()=>!1,r=>{r&&console.warn("Keep intermediate tensors is ON. This will print the values of all intermediate tensors during model inference. Not all models support this mode. For details, check e2e/benchmarks/ model_config.js. This significantly impacts performance.")});var ao;(function(r){r[r.DT_INVALID=0]="DT_INVALID",r[r.DT_FLOAT=1]="DT_FLOAT",r[r.DT_DOUBLE=2]="DT_DOUBLE",r[r.DT_INT32=3]="DT_INT32",r[r.DT_UINT8=4]="DT_UINT8",r[r.DT_INT16=5]="DT_INT16",r[r.DT_INT8=6]="DT_INT8",r[r.DT_STRING=7]="DT_STRING",r[r.DT_COMPLEX64=8]="DT_COMPLEX64",r[r.DT_INT64=9]="DT_INT64",r[r.DT_BOOL=10]="DT_BOOL",r[r.DT_QINT8=11]="DT_QINT8",r[r.DT_QUINT8=12]="DT_QUINT8",r[r.DT_QINT32=13]="DT_QINT32",r[r.DT_BFLOAT16=14]="DT_BFLOAT16",r[r.DT_QINT16=15]="DT_QINT16",r[r.DT_QUINT16=16]="DT_QUINT16",r[r.DT_UINT16=17]="DT_UINT16",r[r.DT_COMPLEX128=18]="DT_COMPLEX128",r[r.DT_HALF=19]="DT_HALF",r[r.DT_RESOURCE=20]="DT_RESOURCE",r[r.DT_VARIANT=21]="DT_VARIANT",r[r.DT_UINT32=22]="DT_UINT32",r[r.DT_UINT64=23]="DT_UINT64",r[r.DT_FLOAT_REF=101]="DT_FLOAT_REF",r[r.DT_DOUBLE_REF=102]="DT_DOUBLE_REF",r[r.DT_INT32_REF=103]="DT_INT32_REF",r[r.DT_UINT8_REF=104]="DT_UINT8_REF",r[r.DT_INT16_REF=105]="DT_INT16_REF",r[r.DT_INT8_REF=106]="DT_INT8_REF",r[r.DT_STRING_REF=107]="DT_STRING_REF",r[r.DT_COMPLEX64_REF=108]="DT_COMPLEX64_REF",r[r.DT_INT64_REF=109]="DT_INT64_REF",r[r.DT_BOOL_REF=110]="DT_BOOL_REF",r[r.DT_QINT8_REF=111]="DT_QINT8_REF",r[r.DT_QUINT8_REF=112]="DT_QUINT8_REF",r[r.DT_QINT32_REF=113]="DT_QINT32_REF",r[r.DT_BFLOAT16_REF=114]="DT_BFLOAT16_REF",r[r.DT_QINT16_REF=115]="DT_QINT16_REF",r[r.DT_QUINT16_REF=116]="DT_QUINT16_REF",r[r.DT_UINT16_REF=117]="DT_UINT16_REF",r[r.DT_COMPLEX128_REF=118]="DT_COMPLEX128_REF",r[r.DT_HALF_REF=119]="DT_HALF_REF",r[r.DT_RESOURCE_REF=120]="DT_RESOURCE_REF",r[r.DT_VARIANT_REF=121]="DT_VARIANT_REF",r[r.DT_UINT32_REF=122]="DT_UINT32_REF",r[r.DT_UINT64_REF=123]="DT_UINT64_REF"})(ao||(ao={}));var $N;(function(r){let e;(function(t){t[t.LEGACY=0]="LEGACY",t[t.V1=1]="V1",t[t.V2=2]="V2"})(e=r.CheckpointFormatVersion||(r.CheckpointFormatVersion={}))})($N||($N={}));var OC={};function Pq(r,e){let t={tfOpName:r,category:"custom",inputs:[],attrs:[],customExecutor:e};OC[r]=t}function Gd(r){return OC[r]}function Mq(r){delete OC[r]}function I(r,e,t,o,n){let s=e.inputParams[r];if(s&&s.inputIndexStart!==void 0){let i=s.inputIndexStart,p=s.inputIndexEnd===0?void 0:s.inputIndexEnd===void 0?i+1:s.inputIndexEnd;if(s.type==="tensor")return Gt(e.inputNames[s.inputIndexStart],t,o,n);if(s.type==="tensors")return e.inputNames.slice(i,p).map(m=>Gt(m,t,o,n));let u=Gt(e.inputNames.slice(i)[0],t,o,n),c=u.dataSync();return s.type==="number"?c[0]:y.toNestedArray(u.shape,c)}let a=e.attrParams[r];return a&&a.value}function Gt(r,e,t,o){let[n,s]=Ir(r);if(o!=null){let i=o.getHashTableHandleByName(n);if(i!=null)return i}let a=t.currentContextIds.find(i=>!!e[Hd(n,i)]);return a!==void 0?e[Hd(n,a)][s]:void 0}function EN(r,e,t){return e[Hd(r,t.currentContextId)]}function as(r,e){let[t,o,n]=Ir(r);return[Hd(t,e&&e.currentContextId),o,n]}function Hd(r,e){return e?`${r}-${e}`:r}function Ir(r){let e=r.split(":");if(e.length===1)return[r,0,void 0];let t=e[0],o=e.length===3?e[1]:
because the value dtype is ${t.dtype}, but TensorArray dtype is ${this.dtype}.`);if(this.size()===0&&(this.elementShape==null||this.elementShape.length===0)&&(this.elementShape=t.shape),Vr(this.elementShape,t.shape,`TensorArray ${this.name}: Could not write to TensorArray index ${e}.`),o.read)throw new Error(`TensorArray ${this.name}: Could not write to TensorArray index ${e}, because it has already been read.`);if(o.written)throw new Error(`TensorArray ${this.name}: Could not write to TensorArray index ${e}, because it has already been written.`);o.tensor=t,_r(t),o.written=!0,this.tensors[e]=o}writeMany(e,t){if(e.length!==t.length)throw new Error(`TensorArray ${this.name}: could not write multiple tensors,because the index size: ${e.length} is not the same as tensors size: ${t.length}.`);e.forEach((o,n)=>this.write(o,t[n]))}gather(e,t){if(t&&t!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but gather requested dtype ${t}`);if(e)e=e.slice(0,this.size());else{e=[];for(let n=0;n<this.size();n++)e.push(n)}if(e.length===0)return nr([],[0].concat(this.elementShape));let o=this.readMany(e);return Vr(this.elementShape,o[0].shape,"TensorArray shape mismatch: "),Sr(o,0)}concat(e){if(e&&e!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but concat requested dtype ${e}`);if(this.size()===0)return nr([],[0].concat(this.elementShape));let t=[];for(let n=0;n<this.size();n++)t.push(n);let o=this.readMany(t);return Vr(this.elementShape,o[0].shape,`TensorArray shape mismatch: tensor array shape (${this.elementShape}) vs first tensor shape (${o[0].shape})`),xt(o,0)}scatter(e,t){if(t.dtype!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but tensor has dtype ${t.dtype}`);if(e.length!==t.shape[0])throw new Error(`Expected len(indices) == tensor.shape[0], but saw: ${e.length} vs. ${t.shape[0]}`);let o=Math.max(...e);if(!this.dynamicSize&&o>=this.maxSize)throw new Error(`Max index must be < array size (${o} vs. ${this.maxSize})`);this.writeMany(e,so(t,0))}split(e,t){if(t.dtype!==this.dtype)throw new Error(`TensorArray dtype is ${this.dtype} but tensor has dtype ${t.dtype}`);let o=0,n=e.map(p=>(o+=p,o));if(o!==t.shape[0])throw new Error(`Expected sum of lengths to be equal to
2022-11-18 17:13:29 +01:00
tensor.shape[0], but sum of lengths is
2023-01-07 21:50:37 +01:00
${o}, and tensor's shape is: ${t.shape}`);if(!this.dynamicSize&&e.length!==this.maxSize)throw new Error(`TensorArray's size is not equal to the size of lengths (${this.maxSize} vs. ${e.length}), and the TensorArray is not marked as dynamically resizeable`);let s=o===0?0:t.size/o,a=[];Ee(()=>{t=W(t,[1,o,s]);for(let p=0;p<e.length;++p){let c=[0,p===0?0:n[p-1],0],l=[1,e[p],s];a[p]=W(Ke(t,c,l),this.elementShape)}return a});let i=[];for(let p=0;p<e.length;p++)i[p]=p;this.writeMany(i,a)}};var Ba=class{constructor(e,t,o,n=-1){this.tensors=e,this.elementShape=t,this.elementDtype=o,e!=null&&e.forEach(s=>{if(o!==s.dtype)throw new Error(`Invalid data types; op elements ${o}, but list elements ${s.dtype}`);Vr(t,s.shape,"TensorList shape mismatch: "),_r(s)}),this.idTensor=Ce(0),this.maxNumElements=n,_r(this.idTensor)}get id(){return this.idTensor.id}copy(){return new Ba([...this.tensors],this.elementShape,this.elementDtype)}clearAndClose(e){this.tensors.forEach(t=>{(e==null||!e.has(t.id))&&t.dispose()}),this.tensors.length=0,this.idTensor.dispose()}size(){return this.tensors.length}stack(e,t,o=-1){if(t!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t}, but list elements ${this.elementDtype}`);if(o!==-1&&this.tensors.length!==o)throw new Error(`Operation expected a list with ${o} elements but got a list with ${this.tensors.length} elements.`);Vr(e,this.elementShape,"TensorList shape mismatch: ");let n=Hp(this.elementShape,this.tensors,e);return Ee(()=>{let s=this.tensors.map(a=>W(a,n));return Sr(s,0)})}popBack(e,t){if(t!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t}, but list elements ${this.elementDtype}`);if(this.size()===0)throw new Error("Trying to pop from an empty list.");let o=Hp(this.elementShape,this.tensors,e),n=this.tensors.pop();return n.kept=!1,Vr(n.shape,e,"TensorList shape mismatch: "),W(n,o)}pushBack(e){if(e.dtype!==this.elementDtype)throw new Error(`Invalid data types; op elements ${e.dtype}, but list elements ${this.elementDtype}`);if(Vr(e.shape,this.elementShape,"TensorList shape mismatch: "),this.maxNumElements===this.size())throw new Error("Trying to push element into a full list.");_r(e),this.tensors.push(e)}resize(e){if(e<0)throw new Error(`TensorListResize expects size to be non-negative. Got: ${e}`);if(this.maxNumElements!==-1&&e>this.maxNumElements)throw new Error(`TensorListResize input size ${e} is greater maxNumElement ${this.maxNumElements}.`);let t=new Ba([],this.elementShape,this.elementDtype,this.maxNumElements);t.tensors.length=e;for(let o=0;o<Math.min(this.tensors.length,e);++o)t.tensors[o]=this.tensors[o];return t}getItem(e,t,o){if(o!==this.elementDtype)throw new Error(`Invalid data types; op elements ${o}, but list elements ${this.elementDtype}`);if(e<0||e>this.tensors.length)throw new Error(`Trying to access element ${e} in a list with ${this.tensors.length} elements.`);if(this.tensors[e]==null)throw new Error(`element at index ${e} is null.`);Vr(this.tensors[e].shape,t,"TensorList shape mismatch: ");let n=Hp(this.elementShape,this.tensors,t);return W(this.tensors[e],n)}setItem(e,t){if(t.dtype!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t.dtype}, but list elements ${this.elementDtype}`);if(e<0||this.maxNumElements!==-1&&e>=this.maxNumElements)throw new Error(`Trying to set element ${e} in a list with max ${this.maxNumElements} elements.`);Vr(this.elementShape,t.shape,"TensorList shape mismatch: "),_r(t),this.tensors[e]!=null&&(this.tensors[e].kept=!1),this.tensors[e]=t}gather(e,t,o){if(t!==this.elementDtype)throw new Error(`Invalid data types; op elements ${t}, but list elements ${this.elementDtype}`);Vr(this.elementShape,o,"TensorList shape mismatch: "),e=e.slice(0,this.size());let n=Hp(this.elementShape,this.tensors,o);return e.length===0?nr([],[0].concat(n)):Ee(()=>{let s=e.map(a=>W(this.tensors[a],n));return Sr(s,0)})}concat(e,t){if(e&&e!==this.elementDtype)throw new Error(`TensorList dtype is ${this.elementDtype} but concat requested dtype ${e}`);Vr(this.elementShape,t,"TensorList shape mismatch: ");let o=H
2022-11-18 17:13:29 +01:00
tensor.shape[0], but sum of lengths is
2023-01-07 21:50:37 +01:00
${o}, and tensor's shape is: ${r.shape}`);let s=r.shape.slice(1),a=of(s,t),i=o===0?0:r.size/o,p=Ee(()=>{let c=[];r=W(r,[1,o,i]);for(let l=0;l<e.length;++l){let d=[0,l===0?0:n[l-1],0],f=[1,e[l],i];c[l]=W(Ke(r,d,f),a)}return r.dispose(),c}),u=new Ba([],t,r.dtype,e.length);for(let c=0;c<p.length;c++)u.setItem(c,p[c]);return u}var zN=async(r,e,t)=>{switch(r.op){case"If":case"StatelessIf":{let o=I("thenBranch",r,e,t),n=I("elseBranch",r,e,t),s=I("cond",r,e,t),a=I("args",r,e,t);return(await s.data())[0]?t.functionMap[o].executeFunctionAsync(a,t.tensorArrayMap,t.tensorListMap):t.functionMap[n].executeFunctionAsync(a,t.tensorArrayMap,t.tensorListMap)}case"While":case"StatelessWhile":{let o=I("body",r,e,t),n=I("cond",r,e,t),s=I("args",r,e,t),a=await t.functionMap[n].executeFunctionAsync(s,t.tensorArrayMap,t.tensorListMap),i=s.map(c=>c.id),p=await a[0].data();a.forEach(c=>{!c.kept&&i.indexOf(c.id)===-1&&c.dispose()});let u=s;for(;p[0];){let c=u;u=await t.functionMap[o].executeFunctionAsync(u,t.tensorArrayMap,t.tensorListMap);let l=u.map(d=>d.id);c.forEach(d=>{!d.kept&&i.indexOf(d.id)===-1&&l.indexOf(d.id)===-1&&d.dispose()});let m=await t.functionMap[n].executeFunctionAsync(u,t.tensorArrayMap,t.tensorListMap);p=await m[0].data(),m.forEach(d=>{!d.kept&&i.indexOf(d.id)===-1&&l.indexOf(d.id)===-1&&d.dispose()})}return u}case"LoopCond":{let o=I("pred",r,e,t);return[is(o)]}case"Switch":{let o=I("pred",r,e,t),n=I("data",r,e,t);return n.kept||(n=is(n)),(await o.data())[0]?[void 0,n]:[n,void 0]}case"Merge":{let o=r.inputNames.find(n=>Gt(n,e,t)!==void 0);if(o){let n=Gt(o,e,t);return[is(n)]}return}case"Enter":{let o=I("frameName",r,e,t),n=I("tensor",r,e,t);return t.enterFrame(o),[is(n)]}case"Exit":{let o=I("tensor",r,e,t);return t.exitFrame(),[is(o)]}case"NextIteration":{let o=I("tensor",r,e,t);return t.nextIteration(),[is(o)]}case"TensorArrayV3":{let o=I("size",r,e,t),n=I("dtype",r,e,t),s=I("elementShape",r,e,t),a=I("dynamicSize",r,e,t),i=I("clearAfterRead",r,e,t),p=I("identicalElementShapes",r,e,t),u=I("name",r,e,t),c=new nf(u,n,o,s,p,a,i);return t.addTensorArray(c),[c.idTensor,Ce(1)]}case"TensorArrayWriteV3":{let o=I("tensorArrayId",r,e,t),n=I("index",r,e,t),s=I("tensor",r,e,t),a=t.getTensorArray(o.id);return a.write(n,s),[a.idTensor]}case"TensorArrayReadV3":{let o=I("tensorArrayId",r,e,t),n=I("index",r,e,t);return[t.getTensorArray(o.id).read(n)]}case"TensorArrayGatherV3":{let o=I("tensorArrayId",r,e,t),n=I("indices",r,e,t),s=I("dtype",r,e,t);return[t.getTensorArray(o.id).gather(n,s)]}case"TensorArrayScatterV3":{let o=I("tensorArrayId",r,e,t),n=I("indices",r,e,t),s=I("tensor",r,e,t),a=t.getTensorArray(o.id);return a.scatter(n,s),[a.idTensor]}case"TensorArrayConcatV3":{let o=I("tensorArrayId",r,e,t),n=t.getTensorArray(o.id),s=I("dtype",r,e,t);return[n.concat(s)]}case"TensorArraySplitV3":{let o=I("tensorArrayId",r,e,t),n=I("tensor",r,e,t),s=I("lengths",r,e,t),a=t.getTensorArray(o.id);return a.split(s,n),[a.idTensor]}case"TensorArraySizeV3":{let o=I("tensorArrayId",r,e,t),n=t.getTensorArray(o.id);return[Ce(n.size(),"int32")]}case"TensorArrayCloseV3":{let o=I("tensorArrayId",r,e,t),n=t.getTensorArray(o.id);return n.clearAndClose(),[n.idTensor]}case"TensorListSetItem":{let o=I("tensorListId",r,e,t),n=I("index",r,e,t),s=I("tensor",r,e,t),a=t.getTensorList(o.id);return a.setItem(n,s),[a.idTensor]}case"TensorListGetItem":{let o=I("tensorListId",r,e,t),n=I("index",r,e,t),s=I("elementShape",r,e,t),a=I("elementDType",r,e,t);return[t.getTensorList(o.id).getItem(n,s,a)]}case"TensorListScatterV2":case"TensorListScatter":{let o=I("indices",r,e,t),n=I("tensor",r,e,t),s=I("elementShape",r,e,t),a=I("numElements",r,e,t),i=BN(n,o,s,a);return t.addTensorList(i),[i.idTensor]}case"TensorListReserve":case"EmptyTensorList":{let o=I("elementShape",r,e,t),n=I("elementDType",r,e,t),s;r.op==="TensorListReserve"?s="numElements":s="maxNumElements";let a=I(s,r,e,t),i=r.op==="TensorListReserve"?-1:a,p=LN(o,n,a,i);return t.addTensorList(p),[p.idTensor]}case"TensorListGather":{let o=I("tensorListId",r,e,t),n=I("indices",r,e,t),s=I("elementShape",r,e,t),a=I("elemen
2022-11-18 17:13:29 +01:00
============================
Hi, looks like you are running TensorFlow.js in Node.js. To speed things up dramatically, install our node backend, visit https://github.com/tensorflow/tfjs-node for more details.
2023-01-06 19:23:06 +01:00
============================`));let n={id:this.nextDataId()};return this.data.set(n,{values:e,dtype:o,refCount:1}),n}makeTensorInfo(e,t,o){let n;if(t==="string"&&o!=null&&o.length>0&&y.isString(o[0])){let s=o.map(a=>y.encodeString(a));n=this.write(s,e,t)}else n=this.write(o,e,t);return{dataId:n,shape:e,dtype:t}}refCount(e){return this.data.has(e)?this.data.get(e).refCount:0}incRef(e){let t=this.data.get(e);t.refCount++}decRef(e){if(this.data.has(e)){let t=this.data.get(e);t.refCount--}}move(e,t,o,n,s){this.data.set(e,{values:t,dtype:n,refCount:s})}numDataIds(){return this.data.numDataIds()}async read(e){return this.readSync(e)}readSync(e){let{dtype:t,complexTensorInfos:o}=this.data.get(e);if(t==="complex64"){let n=this.readSync(o.real.dataId),s=this.readSync(o.imag.dataId);return S.mergeRealAndImagArrays(n,s)}return y.convertBackendValuesAndArrayBuffer(this.data.get(e).values,t)}bufferSync(e){let t=this.readSync(e.dataId);if(e.dtype==="string")try{let o=t.map(n=>y.decodeString(n));return le(e.shape,e.dtype,o)}catch(o){throw new Error("Failed to decode encoded string bytes into utf-8")}return le(e.shape,e.dtype,t)}makeOutput(e,t,o){return sr().makeTensorFromTensorInfo(this.makeTensorInfo(t,o,e),this)}disposeData(e,t=!1){if(this.data.has(e)){if(this.data.get(e).refCount--,!t&&this.data.get(e).refCount>0)return!1;let{complexTensorInfos:o}=this.data.get(e);o!=null&&(this.disposeData(o.real.dataId,!0),this.disposeData(o.imag.dataId,!0)),this.data.delete(e)}return!0}disposeIntermediateTensorInfo(e){this.disposeData(e.dataId)}async time(e){let t=y.now();return e(),{kernelMs:y.now()-t}}memory(){return{unreliable:!0,reasons:["The reported memory is an upper bound. Due to automatic garbage collection, the true allocated memory may be less."]}}where(e){j([e],"where");let t=this.readSync(e.dataId);return O6(e.shape,t)}dispose(){}floatPrecision(){return 32}epsilon(){return super.epsilon()}};Pi.nextDataId=0;var Zp={};Ge(Zp,{addImpl:()=>uS,bincountImpl:()=>jp,bincountReduceImpl:()=>uf,castImpl:()=>iS,ceilImpl:()=>pS,concatImpl:()=>Nu,equalImpl:()=>cS,expImpl:()=>mS,expm1Impl:()=>fS,floorImpl:()=>hS,gatherNdImpl:()=>pf,gatherV2Impl:()=>cf,greaterEqualImpl:()=>xS,greaterImpl:()=>gS,lessEqualImpl:()=>bS,lessImpl:()=>yS,linSpaceImpl:()=>lf,logImpl:()=>CS,maxImpl:()=>mf,maximumImpl:()=>SS,minimumImpl:()=>wS,multiplyImpl:()=>hl,negImpl:()=>IS,notEqualImpl:()=>vS,prodImpl:()=>kS,raggedGatherImpl:()=>df,raggedRangeImpl:()=>ff,raggedTensorToTensorImpl:()=>hf,rangeImpl:()=>_u,rsqrtImpl:()=>NS,scatterImpl:()=>Va,sigmoidImpl:()=>OT,simpleAbsImpl:()=>aS,sliceImpl:()=>$u,sparseFillEmptyRowsImpl:()=>gf,sparseReshapeImpl:()=>xf,sparseSegmentReductionImpl:()=>Qp,sqrtImpl:()=>LT,squaredDifferenceImpl:()=>_S,stridedSliceImpl:()=>yf,stringNGramsImpl:()=>Eu,stringSplitImpl:()=>Au,stringToHashBucketFastImpl:()=>Ru,subImpl:()=>ES,tileImpl:()=>bf,topKImpl:()=>Cf,transposeImpl:()=>Xp,uniqueImpl:()=>Sf});function aS(r){let e=new Float32Array(r.length);for(let t=0;t<r.length;++t)e[t]=Math.abs(r[t]);return e}var P6=r=>{let{x:e}=r.inputs,t=r.backend;j(e,"abs");let o=new Float32Array(y.sizeFromShape(e.shape)),n=t.data.get(e.dataId).values;return o=aS(n),t.makeOutput(o,e.shape,e.dtype)},iT={kernelName:ys,backendName:"cpu",kernelFunc:P6};function Be(r){return(e,t,o,n,s)=>{let a=S.assertAndGetBroadcastShape(e,t),i=a.length,p=y.computeStrides(a),u=y.sizeFromShape(a),c=y.getTypedArrayFromDType(s,u),l=e.length,m=t.length,d=y.computeStrides(e),f=y.computeStrides(t),h=S.getBroadcastDims(e,a),g=S.getBroadcastDims(t,a);if(h.length+g.length===0)for(let x=0;x<c.length;++x)c[x]=r(o[x%o.length],n[x%n.length]);else for(let x=0;x<c.length;++x){let b=y.indexToLoc(x,i,p),C=b.slice(-l);h.forEach(E=>C[E]=0);let w=y.locToIndex(C,l,d),k=b.slice(-m);g.forEach(E=>k[E]=0);let _=y.locToIndex(k,m,f);c[x]=r(o[w],n[_])}return[c,a]}}function Ht(r){let{inputs:e,backend:t}=r,{real:o,imag:n}=e,s=t.data.get(o.dataId).values,a=t.data.get(n.dataId).values,i=t.makeTensorInfo(o.shape,"complex64"),p=t.data.get(i.dataId);return p.complexTensorInfos={real:t.makeTensorInfo(o.shape,"float32",s),imag:t.make
2022-11-18 17:13:29 +01:00
${s.shape}`);if(o.shape.length!==2)throw new Error(`Indices must be a matrix, saw:
${o.shape}`);if(n.shape.length!==1)throw new Error(`Values must be a vector, saw:
${n.shape}`);if(a.shape.length!==0)throw new Error(`Default value must be a scalar, saw:
2023-01-06 19:23:06 +01:00
${a.shape}`);let i=t.data.get(o.dataId).values,p=t.data.get(n.dataId).values,u=t.data.get(s.dataId).values,c=t.data.get(a.dataId).values[0],[l,m,d,f,h]=gf(i,o.shape,o.dtype,p,n.dtype,u,c);return[t.makeTensorInfo(m,o.dtype,l),t.makeTensorInfo([m[0]],n.dtype,d),t.makeTensorInfo([f.length],"bool",new Uint8Array(f.map(g=>Number(g)))),t.makeTensorInfo([h.length],o.dtype,new Int32Array(h))]}var n$={kernelName:hi,backendName:"cpu",kernelFunc:S5};function w5(r){let{inputs:e,backend:t}=r,{inputIndices:o,inputShape:n,newShape:s}=e;if(o.shape.length!==2)throw new Error(`Input indices should be a matrix but received shape
2022-11-18 17:13:29 +01:00
${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape
2023-01-06 19:23:06 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=Array.from(t.data.get(n.dataId).values),i=t.data.get(o.dataId).values,p=Array.from(t.data.get(s.dataId).values),[u,c,l]=xf(i,o.shape,o.dtype,a,p);return[t.makeTensorInfo(c,o.dtype,u),t.makeTensorInfo([l.length],s.dtype,new Int32Array(l))]}var s$={kernelName:va,backendName:"cpu",kernelFunc:w5};function I5(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
2023-01-06 19:23:06 +01:00
${s.shape}`);if(n.shape[0]!==s.shape[0])throw new Error("segmentIds and indices should have same size.");let a=t.data.get(o.dataId).values,i=t.data.get(n.dataId).values,p=t.data.get(s.dataId).values,[u,c]=Qp(a,o.shape,o.dtype,i,p,!0);return t.makeTensorInfo(c,o.dtype,u)}var a$={kernelName:gi,backendName:"cpu",kernelFunc:I5};function v5(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
2023-03-07 00:15:42 +01:00
${s.shape}`);if(n.shape[0]!==s.shape[0])throw new Error("segmentIds and indices should have same size.");let a=t.data.get(o.dataId).values,i=t.data.get(n.dataId).values,p=t.data.get(s.dataId).values,[u,c]=Qp(a,o.shape,o.dtype,i,p);return t.makeTensorInfo(c,o.dtype,u)}var i$={kernelName:xi,backendName:"cpu",kernelFunc:v5};function k5(r){let{inputs:e,backend:t,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=e,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=S.calculateShapes(s,n,i),d=!1,f=t.bufferSync(n),h;switch(s.dtype){case"bool":{let g=t.bufferSync(s),x=!!t.data.get(a.dataId).values[0];h=Va(f,g,i,m,c,u,p,l,x,d);break}case"float32":{let g=t.bufferSync(s),x=t.data.get(a.dataId).values[0];h=Va(f,g,i,m,c,u,p,l,x,d);break}case"int32":{let g=t.bufferSync(s),x=t.data.get(a.dataId).values[0];h=Va(f,g,i,m,c,u,p,l,x,d);break}case"string":{let g=t.bufferSync(s),x=y.decodeString(t.data.get(a.dataId).values[0]);h=Va(f,g,i,m,c,u,p,l,x,d);break}default:throw new Error(`Unsupported type ${s.dtype}`)}return t.makeTensorInfo(i,h.dtype,h.values)}var u$={kernelName:yi,backendName:"cpu",kernelFunc:k5};function N5(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=o,i=y.parseAxisParam(a,n.shape)[0],p=S.prepareSplitSize(n,s,i),u=new Array(n.shape.length).fill(0),c=n.shape.slice();return p.map(l=>{let m=[...c];m[i]=l;let d=To({inputs:{x:n},backend:t,attrs:{begin:u,size:m}});return u[i]+=l,d})}var p$={kernelName:Rs,backendName:"cpu",kernelFunc:N5};var c$={kernelName:bi,backendName:"cpu",kernelFunc:({inputs:r,backend:e})=>{let{x:t}=r,o=e;j(t,"square");let n=o.data.get(t.dataId).values,s=new Float32Array(n.length);for(let i=0;i<n.length;++i){let p=n[i];s[i]=p*p}return{dataId:o.write(s,t.shape,t.dtype),shape:t.shape,dtype:t.dtype}}};var T5=we(fo,(r,e)=>{let t=e;return isNaN(r)?NaN:r>0?1:t.alpha}),l$={kernelName:fo,backendName:"cpu",kernelFunc:T5};function _5(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o;j(n,"stridedSlice");let{finalShapeSparse:d,finalShape:f,isIdentity:h,sliceDim0:g,isSimpleSlice:x,begin:b,end:C,strides:w}=ut.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=Le({inputs:{x:n},backend:t,attrs:{shape:f}});else if(g||x){y.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=ut.computeOutShape(b,C,w),E=To({inputs:{x:n},backend:t,attrs:{begin:b,size:_}});k=Le({inputs:{x:E},backend:t,attrs:{shape:f}}),t.disposeIntermediateTensorInfo(E)}else{let _=t.bufferSync(n),E=yf(d,_,w,b);k=t.makeTensorInfo(f,E.dtype,E.values)}return k}var m$={kernelName:Xn,backendName:"cpu",kernelFunc:_5};function $5(r){let{inputs:e,backend:t,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=e,m=t.data.get(c.dataId).values,d=t.data.get(l.dataId).values,[f,h]=Eu(m,d,n,s,a,i,p,u);return[t.makeTensorInfo([f.length],"string",f),t.makeTensorInfo(l.shape,"int32",h)]}var d$={kernelName:Ds,backendName:"cpu",kernelFunc:$5};function E5(r){let{inputs:e,backend:t,attrs:o}=r,{skipEmpty:n}=o,{input:s,delimiter:a}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(s.shape.length!==1)throw new Error(`Input must be a vector, got shape: ${s.shape}`);if(a.shape.length!==0)throw new Error(`Delimiter must be a scalar, got shape: ${a.shape}`);let i=t.data.get(s.dataId).values,p=t.data.get(a.dataId).values[0],[u,c,l]=Au(i,p,n),m=c.length;return[t.makeTensorInfo([m,2],"int32",u),t.makeTensorInfo([m],"string",c),t.makeTensorInfo([2],"int32",new Int32Array(l))]}var f$={kernelName:Ci,backendName:"cpu",kernelFunc:E5};function A5(r){let{inputs:e,backend:t,attrs:o}=r,{numBuckets:n}=o,{input:s}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(n<=0)throw new Error("Number of buckets must be at least 1");let a=t.data.get(s.dataId).values,i=Ru(a,n);return t.makeTensorInfo(s.shape,"int32",i)}var h$={kernelName:Si,backendName:"cpu",kernelFunc:A5};var R5=we(Qn,r=>Math.tan(r)),
2022-11-20 22:20:02 +01:00
`),s=n.length.toString().length+2,a=n.map((l,m)=>y.rightPad((m+1).toString(),s)+l),i=0;for(let l=0;l<a.length;l++)i=Math.max(a[l].length,i);let p=a.slice(0,o-1),u=a.slice(o-1,o),c=a.slice(o);console.log(p.join(`
2022-11-18 17:13:29 +01:00
`)),console.log(e.split(`
2022-11-20 22:20:02 +01:00
`)[0]),console.log(`%c ${y.rightPad(u[0],i)}`,"border:1px solid red; background-color:#e3d2d2; color:#a61717"),console.log(c.join(`
2023-01-06 19:23:06 +01:00
`))}function XS(r){return Wa(r,()=>r.createProgram(),"Unable to create WebGLProgram.")}function YS(r,e){if(pe(r,()=>r.linkProgram(e)),!O().get("ENGINE_COMPILE_ONLY")&&r.getProgramParameter(e,r.LINK_STATUS)===!1)throw console.log(r.getProgramInfoLog(e)),new Error("Failed to link vertex and fragment shaders.")}function kl(r,e){if(pe(r,()=>r.validateProgram(e)),r.getProgramParameter(e,r.VALIDATE_STATUS)===!1)throw console.log(r.getProgramInfoLog(e)),new Error("Shader program validation failed.")}function QS(r,e){let t=Wa(r,()=>r.createBuffer(),"Unable to create WebGLBuffer");return pe(r,()=>r.bindBuffer(r.ARRAY_BUFFER,t)),pe(r,()=>r.bufferData(r.ARRAY_BUFFER,e,r.STATIC_DRAW)),t}function ZS(r,e){let t=Wa(r,()=>r.createBuffer(),"Unable to create WebGLBuffer");return pe(r,()=>r.bindBuffer(r.ELEMENT_ARRAY_BUFFER,t)),pe(r,()=>r.bufferData(r.ELEMENT_ARRAY_BUFFER,e,r.STATIC_DRAW)),t}function J5(){return O().getNumber("WEBGL_VERSION")===2?1:4}function JS(r){return Wa(r,()=>r.createTexture(),"Unable to create WebGLTexture.")}function ew(r,e){let t=O().getNumber("WEBGL_MAX_TEXTURE_SIZE");if(r<=0||e<=0){let o=`[${r}x${e}]`;throw new Error("Requested texture size "+o+" is invalid.")}if(r>t||e>t){let o=`[${r}x${e}]`,n=`[${t}x${t}]`;throw new Error("Requested texture size "+o+" greater than WebGL maximum on this browser / GPU "+n+".")}}function tw(r){return Wa(r,()=>r.createFramebuffer(),"Unable to create WebGLFramebuffer.")}function Af(r,e,t,o,n,s,a){let i=r.getAttribLocation(e,t);return i===-1?!1:(pe(r,()=>r.bindBuffer(r.ARRAY_BUFFER,o)),pe(r,()=>r.vertexAttribPointer(i,n,r.FLOAT,!1,s,a)),pe(r,()=>r.enableVertexAttribArray(i)),!0)}function _$(r,e,t){E$(r,t),pe(r,()=>r.activeTexture(r.TEXTURE0+t)),pe(r,()=>r.bindTexture(r.TEXTURE_2D,e))}function e8(r,e){E$(r,e),pe(r,()=>r.activeTexture(r.TEXTURE0+e)),pe(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function rw(r,e,t){return Wa(r,()=>r.getUniformLocation(e,t),'uniform "'+t+'" not present in program.')}function ow(r,e,t){return r.getUniformLocation(e,t)}function nw(r,e,t,o){pe(r,()=>_$(r,e,o)),pe(r,()=>r.uniform1i(t,o))}function t8(r){pe(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,null)),pe(r,()=>r.viewport(0,0,r.canvas.width,r.canvas.height)),pe(r,()=>r.scissor(0,0,r.canvas.width,r.canvas.height))}function Nl(r,e,t){pe(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,t)),pe(r,()=>r.framebufferTexture2D(r.FRAMEBUFFER,r.COLOR_ATTACHMENT0,r.TEXTURE_2D,e,0))}function Rf(r,e){pe(r,()=>r.bindFramebuffer(r.FRAMEBUFFER,e)),pe(r,()=>r.framebufferTexture2D(r.FRAMEBUFFER,r.COLOR_ATTACHMENT0,r.TEXTURE_2D,null,0))}function rc(r){let e=r.checkFramebufferStatus(r.FRAMEBUFFER);if(e!==r.FRAMEBUFFER_COMPLETE)throw new Error("Error binding framebuffer: "+$$(r,e))}function $$(r,e){switch(e){case r.FRAMEBUFFER_INCOMPLETE_ATTACHMENT:return"FRAMEBUFFER_INCOMPLETE_ATTACHMENT";case r.FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:return"FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT";case r.FRAMEBUFFER_INCOMPLETE_DIMENSIONS:return"FRAMEBUFFER_INCOMPLETE_DIMENSIONS";case r.FRAMEBUFFER_UNSUPPORTED:return"FRAMEBUFFER_UNSUPPORTED";default:return`unknown error ${e}`}}function Wa(r,e,t){let o=pe(r,()=>e());if(o==null)throw new Error(t);return o}function E$(r,e){let t=r.MAX_COMBINED_TEXTURE_IMAGE_UNITS-1,o=e+r.TEXTURE0;if(o<r.TEXTURE0||o>t){let n=`[gl.TEXTURE0, gl.TEXTURE${t}]`;throw new Error(`textureUnit must be in ${n}.`)}}function Ua(r,e=2){return y.sizeFromShape(r.slice(0,r.length-e))}function Ga(r){if(r.length===0)throw Error("Cannot get rows and columns of an empty shape array.");return[r.length>1?r[r.length-2]:1,r[r.length-1]]}function oc(r){let e=[1,1,1];return r.length===0||r.length===1&&r[0]===1||(e=[Ua(r),...Ga(r)]),e}function sw(r,e=!1){let t=O().getNumber("WEBGL_MAX_TEXTURE_SIZE"),o=O().getNumber("WEBGL_MAX_SIZE_FOR_NARROW_TEXTURE");o===1/0&&O().getBool("WEBGL_AUTO_SQUARIFY_NARROW_TEXTURE_SHAPE")&&(o=t/2),e&&(t=t*2,o=o*2,r=r.map((i,p)=>p>=r.length-2?y.nearestLargerEven(r[p]):r[p]),r.length===1&&(r=[2,r[0]])),r.length!==2&&(r=y.squeezeShape(r).newShape);let n=y.sizeFromShape(r),s=null;r.length<=1&&n<=t?s=[1,n]:r.length===2&&r[0]<=t&&r[1]
2022-11-18 17:13:29 +01:00
bool isnan_custom(float val) {
uint floatToUint = floatBitsToUint(val);
return (floatToUint & 0x7fffffffu) > 0x7f800000u;
}
bvec4 isnan_custom(vec4 val) {
return bvec4(isnan_custom(val.x),
isnan_custom(val.y), isnan_custom(val.z), isnan_custom(val.w));
}
#define isnan(value) isnan_custom(value)
`:"",p="",u=`
#define round(value) newRound(value)
int newRound(float value) {
return int(floor(value + 0.5));
}
ivec4 newRound(vec4 value) {
return ivec4(floor(value + vec4(0.5)));
}
`):(r="",e="attribute",t="varying",o="varying",n="texture2D",s="gl_FragColor",a="",i=`
#define isnan(value) isnan_custom(value)
bool isnan_custom(float val) {
return (val > 0. || val < 1. || val == 0.) ? false : true;
}
bvec4 isnan_custom(vec4 val) {
return bvec4(isnan(val.x), isnan(val.y), isnan(val.z), isnan(val.w));
}
`,p=`
uniform float INFINITY;
bool isinf(float val) {
return abs(val) == INFINITY;
}
bvec4 isinf(vec4 val) {
return equal(abs(val), vec4(INFINITY));
}
`,u=`
int round(float value) {
return int(floor(value + 0.5));
}
ivec4 round(vec4 value) {
return ivec4(floor(value + vec4(0.5)));
}
2023-01-06 19:23:06 +01:00
`),{version:r,attribute:e,varyingVs:t,varyingFs:o,texture2D:n,output:s,defineOutput:a,defineSpecialNaN:i,defineSpecialInf:p,defineRound:u}}function ps(r,e,t="index"){let o=y.computeStrides(e);return o.map((n,s)=>{let a=`int ${r[s]} = ${t} / ${n}`,i=s===o.length-1?`int ${r[s+1]} = ${t} - ${r[s]} * ${n}`:`index -= ${r[s]} * ${n}`;return`${a}; ${i};`}).join("")}function Pu(r,e,t="index"){let o=y.computeStrides(e);return o.map((n,s)=>{let a=`int ${r[s]} = ${t} / outShapeStrides[${s}]`,i=s===o.length-1?`int ${r[s+1]} = ${t} - ${r[s]} * outShapeStrides[${s}]`:`index -= ${r[s]} * outShapeStrides[${s}]`;return`${a}; ${i};`}).join("")}function s8(r,e){let t=r.length,o=r.map(s=>`${e}[${s}]`),n=new Array(t-1);n[t-2]=o[t-1];for(let s=t-3;s>=0;--s)n[s]=`(${n[s+1]} * ${o[s+1]})`;return n}function A$(r,e,t="index"){let o=r.map((s,a)=>a),n=s8(o,e);return n.map((s,a)=>{let i=`int ${r[a]} = ${t} / ${n[a]}`,p=a===n.length-1?`int ${r[a+1]} = ${t} - ${r[a]} * ${n[a]}`:`index -= ${r[a]} * ${n[a]}`;return`${i}; ${p};`}).join("")}function sc(r){let e=y.computeStrides(r).map(t=>t.toString());return`
2022-11-18 17:13:29 +01:00
int getFlatIndex(ivec3 coords) {
return coords.x * ${e[0]} + coords.y * ${e[1]} + coords.z;
}
2023-01-06 19:23:06 +01:00
`}function ac(){return`
2022-11-18 17:13:29 +01:00
int getFlatIndex(ivec3 coords) {
return coords.x * outShapeStrides[0] + coords.y * outShapeStrides[1] + coords.z;
}
2023-01-06 19:23:06 +01:00
`}var Ff=`
2022-11-18 17:13:29 +01:00
const float FLOAT_MAX = 1.70141184e38;
const float FLOAT_MIN = 1.17549435e-38;
lowp vec4 encode_float(highp float v) {
if (isnan(v)) {
return vec4(255, 255, 255, 255);
}
highp float av = abs(v);
if(av < FLOAT_MIN) {
return vec4(0.0, 0.0, 0.0, 0.0);
} else if(v > FLOAT_MAX) {
return vec4(0.0, 0.0, 128.0, 127.0) / 255.0;
} else if(v < -FLOAT_MAX) {
return vec4(0.0, 0.0, 128.0, 255.0) / 255.0;
}
highp vec4 c = vec4(0,0,0,0);
highp float e = floor(log2(av));
highp float m = exp2(fract(log2(av))) - 1.0;
c[2] = floor(128.0 * m);
m -= c[2] / 128.0;
c[1] = floor(32768.0 * m);
m -= c[1] / 32768.0;
c[0] = floor(8388608.0 * m);
highp float ebias = e + 127.0;
c[3] = floor(ebias / 2.0);
ebias -= c[3] * 2.0;
c[2] += floor(ebias) * 128.0;
c[3] += 128.0 * step(0.0, -v);
return c / 255.0;
}
2023-01-06 19:23:06 +01:00
`;var{getBroadcastDims:R$}=S;function D$(r,e,t){let o=[];if(r.forEach(d=>{let f=y.sizeFromShape(d.shapeInfo.logicalShape);if(d.shapeInfo.isUniform?o.push(`uniform float ${d.name}${f>1?`[${f}]`:""};`):(o.push(`uniform sampler2D ${d.name};`),o.push(`uniform int offset${d.name};`)),t.enableShapeUniforms){let{uniformShape:h}=Of(t.packedInputs,d.shapeInfo.logicalShape,d.shapeInfo.texShape);switch(h.length){case 1:o.push(`uniform int ${d.name}Shape;`);break;case 2:o.push(`uniform ivec2 ${d.name}Shape;`);break;case 3:o.push(`uniform ivec3 ${d.name}Shape;`);break;case 4:o.push(`uniform ivec4 ${d.name}Shape;`);break;default:break}o.push(`uniform ivec2 ${d.name}TexShape;`)}}),t.enableShapeUniforms){switch(e.logicalShape.length){case 1:o.push("uniform int outShape;");break;case 2:o.push("uniform ivec2 outShape;"),o.push("uniform int outShapeStrides;");break;case 3:o.push("uniform ivec3 outShape;"),o.push("uniform ivec2 outShapeStrides;");break;case 4:o.push("uniform ivec4 outShape;"),o.push("uniform ivec3 outShapeStrides;");break;default:break}o.push("uniform ivec2 outTexShape;")}t.customUniforms&&t.customUniforms.forEach(d=>{o.push(`uniform ${d.type} ${d.name}${d.arrayIndex?`[${d.arrayIndex}]`:""};`)});let n=o.join(`
`),s=r.map(d=>a8(d,e,t.packedInputs,t.enableShapeUniforms)).join(`
`),a=e.texShape,i=wt(),p=p8(i),u,c,l=m8(i);return e.isPacked?(u=i8(e.logicalShape,a,t.enableShapeUniforms),c=l8(i)):(u=u8(e.logicalShape,a,t.enableShapeUniforms),c=c8(i)),t.packedInputs&&(l+=g8),[l,p,c,n,u,s,t.userCode].join(`
`)}function uc(r,e=!1){let t=r.shapeInfo.logicalShape;switch(t.length){case 0:return _8(r,e);case 1:return E8(r,e);case 2:return R8(r,e);case 3:return F8(r,e);case 4:return P8(r,e);case 5:return M8(r);case 6:return L8(r);default:throw new Error(`${t.length}-D input sampling is not yet supported`)}}function F$(r,e){switch(r.shapeInfo.logicalShape.length){case 0:return T8(r);case 1:return $8(r,e);case 2:return A8(r,e);case 3:return D8(r,e);default:return O8(r,e)}}function a8(r,e,t=!1,o){let n="";t?n+=F$(r,o):n+=uc(r,o);let s=r.shapeInfo.logicalShape,a=e.logicalShape;return s.length<=a.length&&(t?n+=B8(r,e):n+=V8(r,e)),n}function i8(r,e,t){switch(r.length){case 0:return O$();case 1:return x8(r,e,t);case 2:return k8(r,e,t);case 3:return b8(r,e,t);default:return S8(r,e,t)}}function u8(r,e,t){switch(r.length){case 0:return O$();case 1:return y8(r,e,t);case 2:return N8(r,e,t);case 3:return C8(r,e,t);case 4:return w8(r,e,t);case 5:return I8(r,e);case 6:return v8(r,e);default:throw new Error(`${r.length}-D output sampling is not yet supported`)}}function p8(r){return`
2022-11-18 17:13:29 +01:00
float sampleTexture(sampler2D textureSampler, vec2 uv) {
return ${r.texture2D}(textureSampler, uv).r;
}
2023-01-06 19:23:06 +01:00
`}function c8(r){return`
2022-11-18 17:13:29 +01:00
void setOutput(float val) {
${r.output} = vec4(val, 0, 0, 0);
}
2023-01-06 19:23:06 +01:00
`}function l8(r){return`
2022-11-18 17:13:29 +01:00
void setOutput(vec4 val) {
${r.output} = val;
}
2023-01-06 19:23:06 +01:00
`}function m8(r){return`${r.version}
2022-11-18 17:13:29 +01:00
precision highp float;
precision highp int;
precision highp sampler2D;
${r.varyingFs} vec2 resultUV;
${r.defineOutput}
const vec2 halfCR = vec2(0.5, 0.5);
struct ivec5
{
int x;
int y;
int z;
int w;
int u;
};
struct ivec6
{
int x;
int y;
int z;
int w;
int u;
int v;
};
uniform float NAN;
${r.defineSpecialNaN}
${r.defineSpecialInf}
${r.defineRound}
int imod(int x, int y) {
return x - y * (x / y);
}
int idiv(int a, int b, float sign) {
int res = a / b;
int mod = imod(a, b);
if (sign < 0. && mod != 0) {
res -= 1;
}
return res;
}
//Based on the work of Dave Hoskins
//https://www.shadertoy.com/view/4djSRW
#define HASHSCALE1 443.8975
float random(float seed){
vec2 p = resultUV * seed;
vec3 p3 = fract(vec3(p.xyx) * HASHSCALE1);
p3 += dot(p3, p3.yzx + 19.19);
return fract((p3.x + p3.y) * p3.z);
}
2023-01-06 19:23:06 +01:00
${d8}
${f8}
${h8}
`}var d8=`
2022-11-18 17:13:29 +01:00
vec2 uvFromFlat(int texNumR, int texNumC, int index) {
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 packedUVfrom1D(int texNumR, int texNumC, int index) {
int texelIndex = index / 2;
int texR = texelIndex / texNumC;
int texC = texelIndex - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
2023-01-06 19:23:06 +01:00
`,f8=`
2022-11-18 17:13:29 +01:00
vec2 packedUVfrom2D(int texelsInLogicalRow, int texNumR,
int texNumC, int row, int col) {
int texelIndex = (row / 2) * texelsInLogicalRow + (col / 2);
int texR = texelIndex / texNumC;
int texC = texelIndex - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
2023-01-06 19:23:06 +01:00
`,h8=`
2022-11-18 17:13:29 +01:00
vec2 packedUVfrom3D(int texNumR, int texNumC,
int texelsInBatch, int texelsInLogicalRow, int b,
int row, int col) {
int index = b * texelsInBatch + (row / 2) * texelsInLogicalRow + (col / 2);
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
2023-01-06 19:23:06 +01:00
`,g8=`
2022-11-18 17:13:29 +01:00
float getChannel(vec4 frag, vec2 innerDims) {
vec2 modCoord = mod(innerDims, 2.);
return modCoord.x == 0. ?
(modCoord.y == 0. ? frag.r : frag.g) :
(modCoord.y == 0. ? frag.b : frag.a);
}
float getChannel(vec4 frag, int dim) {
float modCoord = mod(float(dim), 2.);
return modCoord == 0. ? frag.r : frag.g;
}
2023-01-06 19:23:06 +01:00
`;function O$(){return`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return 0;
}
2023-01-06 19:23:06 +01:00
`}function x8(r,e,t){let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)];return o[0]===1?t?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return 2 * int(resultUV.x * ceil(float(outTexShape[1]) / 2.0));
}
`:`
int getOutputCoords() {
return 2 * int(resultUV.x * ${o[1]}.0);
}
`:o[1]===1?t?`
int getOutputCoords() {
return 2 * int(resultUV.y * ceil(float(outTexShape[0]) / 2.0));
}
`:`
int getOutputCoords() {
return 2 * int(resultUV.y * ${o[0]}.0);
}
`:t?`
int getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
return 2 * (resTexRC.x * packedTexShape[1] + resTexRC.y);
}
`:`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
return 2 * (resTexRC.x * ${o[1]} + resTexRC.y);
}
2023-01-06 19:23:06 +01:00
`}function y8(r,e,t){return e[0]===1?t?`
2022-11-18 17:13:29 +01:00
int getOutputCoords() {
return int(resultUV.x * float(outTexShape[1]));
}
`:`
int getOutputCoords() {
return int(resultUV.x * ${e[1]}.0);
}
`:e[1]===1?t?`
int getOutputCoords() {
return int(resultUV.y * float(outTexShape[0]));
}
`:`
int getOutputCoords() {
return int(resultUV.y * ${e[0]}.0);
}
`:t?`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
return resTexRC.x * outTexShape[1] + resTexRC.y;
}
`:`
int getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
return resTexRC.x * ${e[1]} + resTexRC.y;
}
2023-01-06 19:23:06 +01:00
`}function b8(r,e,t){if(t)return`
2022-11-18 17:13:29 +01:00
ivec3 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
int texelsInLogicalRow = int(ceil(float(outShape[2]) / 2.0));
int texelsInBatch = texelsInLogicalRow * int(ceil(float(outShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int b = index / texelsInBatch;
index -= b * texelsInBatch;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec3(b, r, c);
}
`;let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)],n=Math.ceil(r[2]/2),s=n*Math.ceil(r[1]/2);return`
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
int b = index / ${s};
index -= b * ${s};
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec3(b, r, c);
}
2023-01-06 19:23:06 +01:00
`}function C8(r,e,t){if(t)return`
2022-11-18 17:13:29 +01:00
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
2023-01-06 19:23:06 +01:00
${Pu(["r","c","d"],r)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
2023-01-06 19:23:06 +01:00
`;let o=ps(["r","c","d"],r);return`
2022-11-18 17:13:29 +01:00
ivec3 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${o}
return ivec3(r, c, d);
}
2023-01-06 19:23:06 +01:00
`}function S8(r,e,t){if(t)return`
2022-11-18 17:13:29 +01:00
ivec4 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int texelsInLogicalRow = int(ceil(float(outShape[3]) / 2.0));
int texelsInBatch = texelsInLogicalRow * int(ceil(float(outShape[2]) / 2.0));
int texelsInBatchN = texelsInBatch * outShape[1];
int b2 = index / texelsInBatchN;
index -= b2 * texelsInBatchN;
int b = index / texelsInBatch;
index -= b * texelsInBatch;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec4(b2, b, r, c);
}
`;let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)],n=Math.ceil(r[r.length-1]/2),s=n*Math.ceil(r[r.length-2]/2),a=s,i="",p="b, r, c";for(let u=2;u<r.length-1;u++)a*=r[r.length-u-1],i=`
int b${u} = index / ${a};
index -= b${u} * ${a};
`+i,p=`b${u}, `+p;return`
ivec${r.length} getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
${i}
int b = index / ${s};
index -= b * ${s};
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec${r.length}(${p});
}
2023-01-06 19:23:06 +01:00
`}function w8(r,e,t){if(t)return`
2022-11-18 17:13:29 +01:00
ivec4 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
2023-01-06 19:23:06 +01:00
${Pu(["r","c","d","d2"],r)}
2022-11-18 17:13:29 +01:00
return ivec4(r, c, d, d2);
}
2023-01-06 19:23:06 +01:00
`;let o=ps(["r","c","d","d2"],r);return`
2022-11-18 17:13:29 +01:00
ivec4 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${o}
return ivec4(r, c, d, d2);
}
2023-01-06 19:23:06 +01:00
`}function I8(r,e){let t=ps(["r","c","d","d2","d3"],r);return`
2022-11-18 17:13:29 +01:00
ivec5 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(${e[0]},
${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${t}
ivec5 outShape = ivec5(r, c, d, d2, d3);
return outShape;
}
2023-01-06 19:23:06 +01:00
`}function v8(r,e){let t=ps(["r","c","d","d2","d3","d4"],r);return`
2022-11-18 17:13:29 +01:00
ivec6 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
${t}
ivec6 result = ivec6(r, c, d, d2, d3, d4);
return result;
}
2023-01-06 19:23:06 +01:00
`}function k8(r,e,t){let o=[Math.ceil(e[0]/2),Math.ceil(e[1]/2)];if(y.arraysEqual(r,e))return t?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
return 2 * ivec2(resultUV.yx * vec2(packedTexShape[0], packedTexShape[1]));
}
`:`
ivec2 getOutputCoords() {
return 2 * ivec2(resultUV.yx * vec2(${o[0]}, ${o[1]}));
}
`;let n=Math.ceil(r[1]/2);return t?`
ivec2 getOutputCoords() {
ivec2 packedTexShape = ivec2(ceil(float(outTexShape[0]) / 2.0), ceil(float(outTexShape[1]) / 2.0));
int texelsInLogicalRow = int(ceil(float(outShape[1]) / 2.0));
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(packedTexShape[0], packedTexShape[1]));
int index = resTexRC.x * packedTexShape[1] + resTexRC.y;
int r = 2 * (index / texelsInLogicalRow);
int c = imod(index, texelsInLogicalRow) * 2;
return ivec2(r, c);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${o[0]}, ${o[1]}));
int index = resTexRC.x * ${o[1]} + resTexRC.y;
int r = 2 * (index / ${n});
int c = imod(index, ${n}) * 2;
return ivec2(r, c);
}
2023-01-06 19:23:06 +01:00
`}function N8(r,e,t){return y.arraysEqual(r,e)?t?`
2022-11-18 17:13:29 +01:00
ivec2 getOutputCoords() {
return ivec2(resultUV.yx * vec2(outTexShape[0], outTexShape[1]));
}
`:`
ivec2 getOutputCoords() {
return ivec2(resultUV.yx * vec2(${e[0]}, ${e[1]}));
}
`:r[1]===1?t?`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
return ivec2(index, 0);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
return ivec2(index, 0);
}
`:r[0]===1?t?`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
return ivec2(0, index);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
return ivec2(0, index);
}
`:t?`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(outTexShape[0], outTexShape[1]));
int index = resTexRC.x * outTexShape[1] + resTexRC.y;
int r = index / outShape[1];
int c = index - r * outShape[1];
return ivec2(r, c);
}
`:`
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(${e[0]}, ${e[1]}));
int index = resTexRC.x * ${e[1]} + resTexRC.y;
int r = index / ${r[1]};
int c = index - r * ${r[1]};
return ivec2(r, c);
}
2023-01-06 19:23:06 +01:00
`}function Mu(r){return`offset${r}`}function T8(r){let e=r.name,t="get"+e.charAt(0).toUpperCase()+e.slice(1),o=wt();return`
2022-11-18 17:13:29 +01:00
vec4 ${t}() {
return ${o.texture2D}(${e}, halfCR);
}
2023-01-06 19:23:06 +01:00
`}function _8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1);if(r.shapeInfo.isUniform)return`float ${o}() {return ${t};}`;let[n,s]=r.shapeInfo.texShape;if(n===1&&s===1)return`
2022-11-18 17:13:29 +01:00
float ${o}() {
return sampleTexture(${t}, halfCR);
}
2023-01-06 19:23:06 +01:00
`;let a=Mu(t);if(e)return`
2022-11-18 17:13:29 +01:00
float ${o}() {
vec2 uv = uvFromFlat(${t}TexShape[0], ${t}TexShape[1], ${a});
return sampleTexture(${t}, uv);
}
`;let[i,p]=r.shapeInfo.texShape;return`
float ${o}() {
vec2 uv = uvFromFlat(${i}, ${p}, ${a});
return sampleTexture(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`}function $8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),n=r.shapeInfo.texShape,s=wt();if(e)return`
2022-11-18 17:13:29 +01:00
vec4 ${o}(int index) {
ivec2 packedTexShape = ivec2(ceil(float(${t}TexShape[0]) / 2.0), ceil(float(${t}TexShape[1]) / 2.0));
vec2 uv = packedUVfrom1D(
packedTexShape[0], packedTexShape[1], index);
return ${s.texture2D}(${t}, uv);
}
`;let a=[Math.ceil(n[0]/2),Math.ceil(n[1]/2)];return`
vec4 ${o}(int index) {
vec2 uv = packedUVfrom1D(
${a[0]}, ${a[1]}, index);
return ${s.texture2D}(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`}function E8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1);if(r.shapeInfo.isUniform)return`
2022-11-18 17:13:29 +01:00
float ${o}(int index) {
2023-01-06 19:23:06 +01:00
${pc(r)}
2022-11-18 17:13:29 +01:00
}
`;let n=r.shapeInfo.texShape,s=n[0],a=n[1];if(a===1&&s===1)return`
float ${o}(int index) {
return sampleTexture(${t}, halfCR);
}
2023-01-06 19:23:06 +01:00
`;let i=Mu(t);return a===1?e?`
2022-11-18 17:13:29 +01:00
float ${o}(int index) {
vec2 uv = vec2(0.5, (float(index + ${i}) + 0.5) / float(${t}TexShape[0]));
return sampleTexture(${t}, uv);
}
`:`
float ${o}(int index) {
vec2 uv = vec2(0.5, (float(index + ${i}) + 0.5) / ${s}.0);
return sampleTexture(${t}, uv);
}
`:s===1?e?`
float ${o}(int index) {
vec2 uv = vec2((float(index + ${i}) + 0.5) / float(${t}TexShape[1]), 0.5);
return sampleTexture(${t}, uv);
}
`:`
float ${o}(int index) {
vec2 uv = vec2((float(index + ${i}) + 0.5) / ${a}.0, 0.5);
return sampleTexture(${t}, uv);
}
`:e?`
float ${o}(int index) {
vec2 uv = uvFromFlat(${t}TexShape[0], ${t}TexShape[1], index + ${i});
return sampleTexture(${t}, uv);
}
`:`
float ${o}(int index) {
vec2 uv = uvFromFlat(${s}, ${a}, index + ${i});
return sampleTexture(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`}function A8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape,a=s[0],i=s[1],p=wt();if(s!=null&&y.arraysEqual(t,s))return e?`
2022-11-18 17:13:29 +01:00
vec4 ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return ${p.texture2D}(${o}, uv);
}
`:`
vec4 ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${i}.0, ${a}.0);
return ${p.texture2D}(${o}, uv);
}
`;if(e)return`
vec4 ${n}(int row, int col) {
ivec2 packedTexShape = ivec2(ceil(float(${o}TexShape[0]) / 2.0), ceil(float(${o}TexShape[1]) / 2.0));
int valuesPerRow = int(ceil(float(${o}Shape[1]) / 2.0));
vec2 uv = packedUVfrom2D(valuesPerRow, packedTexShape[0], packedTexShape[1], row, col);
return ${p.texture2D}(${o}, uv);
}
`;let u=[Math.ceil(s[0]/2),Math.ceil(s[1]/2)],c=Math.ceil(t[1]/2);return`
vec4 ${n}(int row, int col) {
vec2 uv = packedUVfrom2D(${c}, ${u[0]}, ${u[1]}, row, col);
return ${p.texture2D}(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`}function R8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape;if(s!=null&&y.arraysEqual(t,s)){if(e)return`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
vec2 uv = (vec2(col, row) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
2022-11-20 22:20:02 +01:00
`;let m=s[0],d=s[1];return`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
2022-11-20 22:20:02 +01:00
vec2 uv = (vec2(col, row) + halfCR) / vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`}let{newShape:a,keptDims:i}=y.squeezeShape(t),p=a;if(p.length<t.length){let m=cc(r,p),d=["row","col"];return`
${uc(m,e)}
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
2023-01-06 19:23:06 +01:00
return ${n}(${lc(d,i)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col) {
int index = round(dot(vec2(row, col), vec2(${t[1]}, 1)));
2023-01-06 19:23:06 +01:00
${pc(r)}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`;let u=s[0],c=s[1],l=Mu(o);return c===1?e?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${o}Shape[1], 1, 1));
vec2 uv = vec2(0.5, (index + 0.5) / float(${o}TexShape[0]));
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${t[1]}, 1, 1));
vec2 uv = vec2(0.5, (index + 0.5) / ${u}.0);
return sampleTexture(${o}, uv);
}
`:u===1?e?`
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${o}Shape[1], 1, 1));
vec2 uv = vec2((index + 0.5) / float(${o}TexShape[1]), 0.5);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
float index = dot(vec3(row, col, ${l}), vec3(${t[1]}, 1, 1));
vec2 uv = vec2((index + 0.5) / ${c}.0, 0.5);
return sampleTexture(${o}, uv);
}
`:e?`
float ${n}(int row, int col) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${o}Shape[1] + col + ${l};
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${t[1]} + col + ${l};
vec2 uv = uvFromFlat(${u}, ${c}, index);
return sampleTexture(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`}function D8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=r.shapeInfo.texShape,a=[Math.ceil(s[0]/2),Math.ceil(s[1]/2)];if(t[0]===1){let m=t.slice(1),d=[1,2],f=cc(r,m),h=["b","row","col"];return`
${F$(f,e)}
2022-11-18 17:13:29 +01:00
vec4 ${n}(int b, int row, int col) {
2023-01-06 19:23:06 +01:00
return ${n}(${lc(h,d)});
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`}let i=wt();if(e)return`
2022-11-18 17:13:29 +01:00
vec4 ${n}(int b, int row, int col) {
ivec2 packedTexShape = ivec2(ceil(float(${o}TexShape[0]) / 2.0), ceil(float(${o}TexShape[1]) / 2.0));
int valuesPerRow = int(ceil(float(${o}Shape[2]) / 2.0));
int texelsInBatch = valuesPerRow * int(ceil(float(${o}Shape[1]) / 2.0));
vec2 uv = packedUVfrom3D(
packedTexShape[0], packedTexShape[1], texelsInBatch, valuesPerRow, b, row, col);
return ${i.texture2D}(${o}, uv);
}
`;let p=a[0],u=a[1],c=Math.ceil(t[2]/2),l=c*Math.ceil(t[1]/2);return`
vec4 ${n}(int b, int row, int col) {
vec2 uv = packedUVfrom3D(
${p}, ${u}, ${l}, ${c}, b, row, col);
return ${i.texture2D}(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`}function F8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=t[1]*t[2],a=t[2],{newShape:i,keptDims:p}=y.squeezeShape(t),u=i;if(u.length<t.length){let h=cc(r,u),g=["row","col","depth"];return`
${uc(h,e)}
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
2023-01-06 19:23:06 +01:00
return ${n}(${lc(g,p)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col, int depth) {
int index = round(dot(vec3(row, col, depth),
vec3(${s}, ${a}, 1)));
2023-01-06 19:23:06 +01:00
${pc(r)}
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let c=r.shapeInfo.texShape,l=c[0],m=c[1],d=r.shapeInfo.flatOffset;if(m===s&&d==null)return e?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
int stride1 = ${o}Shape[2];
float texR = float(row);
float texC = dot(vec2(col, depth), vec2(stride1, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
float texR = float(row);
float texC = dot(vec2(col, depth), vec2(${a}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${m}.0, ${l}.0);
return sampleTexture(${o}, uv);
}
2022-11-20 22:20:02 +01:00
`;if(m===a&&d==null)return e?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
float texR = dot(vec2(row, col), vec2(${o}Shape[1], 1));
float texC = float(depth);
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
float texR = dot(vec2(row, col), vec2(${t[1]}, 1));
float texC = float(depth);
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${m}.0, ${l}.0);
return sampleTexture(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`;let f=Mu(o);return e?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth) {
// Explicitly use integer operations as dot() only works on floats.
int stride0 = ${o}Shape[1] * ${o}Shape[2];
int stride1 = ${o}Shape[2];
2022-11-20 22:20:02 +01:00
int index = row * stride0 + col * stride1 + depth + ${f};
2022-11-18 17:13:29 +01:00
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth) {
// Explicitly use integer operations as dot() only works on floats.
2022-11-20 22:20:02 +01:00
int index = row * ${s} + col * ${a} + depth + ${f};
2022-11-18 17:13:29 +01:00
vec2 uv = uvFromFlat(${l}, ${m}, index);
return sampleTexture(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`}function O8(r,e){let t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),n=wt();if(e)return`
2022-11-18 17:13:29 +01:00
vec4 ${o}(int b2, int b, int row, int col) {
int valuesPerRow = int(ceil(float(${t}Shape[3]) / 2.0));
int texelsInBatch = valuesPerRow * int(ceil(float(${t}Shape[2]) / 2.0));
int index = b * texelsInBatch + (row / 2) * valuesPerRow + (col / 2);
texelsInBatch *= ${t}Shape[1];
index = b2 * texelsInBatch + index;
ivec2 packedTexShape = ivec2(ceil(float(${t}TexShape[0]) / 2.0), ceil(float(${t}TexShape[1]) / 2.0));
int texR = index / packedTexShape[1];
int texC = index - texR * packedTexShape[1];
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(packedTexShape[1], packedTexShape[0]); return ${n.texture2D}(${t}, uv);
}
2022-11-20 22:20:02 +01:00
`;let s=r.shapeInfo.logicalShape,a=s.length,i=r.shapeInfo.texShape,p=[Math.ceil(i[0]/2),Math.ceil(i[1]/2)],u=p[0],c=p[1],l=Math.ceil(s[a-1]/2),m=l*Math.ceil(s[a-2]/2),d="int b, int row, int col",f=`b * ${m} + (row / 2) * ${l} + (col / 2)`;for(let h=2;h<a-1;h++)d=`int b${h}, `+d,m*=s[a-h-1],f=`b${h} * ${m} + `+f;return`
vec4 ${o}(${d}) {
int index = ${f};
2022-11-18 17:13:29 +01:00
int texR = index / ${c};
int texC = index - texR * ${c};
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${c}, ${u});
return ${n.texture2D}(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`}function P8(r,e){let t=r.shapeInfo.logicalShape,o=r.name,n="get"+o.charAt(0).toUpperCase()+o.slice(1),s=t[3],a=t[2]*s,i=t[1]*a,{newShape:p,keptDims:u}=y.squeezeShape(t);if(p.length<t.length){let b=cc(r,p),C=["row","col","depth","depth2"];return`
${uc(b,e)}
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
2023-01-06 19:23:06 +01:00
return ${n}(${lc(C,u)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${n}(int row, int col, int depth, int depth2) {
int index = round(dot(vec4(row, col, depth, depth2),
vec4(${i}, ${a}, ${s}, 1)));
2023-01-06 19:23:06 +01:00
${pc(r)}
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let c=r.shapeInfo.flatOffset,l=r.shapeInfo.texShape,m=l[0],d=l[1],f=`int stride2 = ${o}Shape[3];`,h=`int stride1 = ${o}Shape[2] * stride2;`,g=`int stride0 = ${o}Shape[1] * stride1;`;if(d===i&&c==null)return e?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
${h}
float texR = float(row);
float texC =
dot(vec3(col, depth, depth2),
vec3(stride1, stride2, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
float texR = float(row);
float texC =
dot(vec3(col, depth, depth2),
vec3(${a}, ${s}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
2022-11-20 22:20:02 +01:00
`;if(d===s&&c==null)return e?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
float texR = dot(vec3(row, col, depth),
vec3(${o}Shape[1] * ${o}Shape[2], ${o}Shape[2], 1));
float texC = float(depth2);
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${o}TexShape[1], ${o}TexShape[0]);
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
float texR = dot(vec3(row, col, depth),
vec3(${t[1]*t[2]}, ${t[2]}, 1));
float texC = float(depth2);
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`;let x=Mu(o);return e?`
2022-11-18 17:13:29 +01:00
float ${n}(int row, int col, int depth, int depth2) {
// Explicitly use integer operations as dot() only works on floats.
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
${h}
${g}
int index = row * stride0 + col * stride1 +
depth * stride2 + depth2;
2022-11-20 22:20:02 +01:00
vec2 uv = uvFromFlat(${o}TexShape[0], ${o}TexShape[1], index + ${x});
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
`:`
float ${n}(int row, int col, int depth, int depth2) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${i} + col * ${a} +
depth * ${s} + depth2;
2022-11-20 22:20:02 +01:00
vec2 uv = uvFromFlat(${m}, ${d}, index + ${x});
2022-11-18 17:13:29 +01:00
return sampleTexture(${o}, uv);
}
2023-01-06 19:23:06 +01:00
`}function M8(r){let e=r.shapeInfo.logicalShape,t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),n=e[4],s=e[3]*n,a=e[2]*s,i=e[1]*a,{newShape:p,keptDims:u}=y.squeezeShape(e);if(p.length<e.length){let h=cc(r,p),g=["row","col","depth","depth2","depth3"];return`
${uc(h)}
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
2023-01-06 19:23:06 +01:00
return ${o}(${lc(g,u)});
2022-11-18 17:13:29 +01:00
}
`}if(r.shapeInfo.isUniform)return`
float ${o}(int row, int col, int depth, int depth2, int depth3) {
float index = dot(
vec4(row, col, depth, depth2),
vec4(${i}, ${a}, ${s}, ${n})) +
depth3;
2023-01-06 19:23:06 +01:00
${pc(r)}
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let c=r.shapeInfo.flatOffset,l=r.shapeInfo.texShape,m=l[0],d=l[1];if(d===i&&c==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
int texR = row;
float texC = dot(vec4(col, depth, depth2, depth3),
vec4(${a}, ${s}, ${n}, 1));
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${t}, uv);
}
2022-11-20 22:20:02 +01:00
`;if(d===n&&c==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
float texR = dot(
vec4(row, col, depth, depth2),
vec4(${e[1]*e[2]*e[3]},
${e[2]*e[3]}, ${e[3]}, 1));
int texC = depth3;
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${d}.0, ${m}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`;let f=Mu(t);return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth, int depth2, int depth3) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${i} + col * ${a} + depth * ${s} +
2022-11-20 22:20:02 +01:00
depth2 * ${n} + depth3 + ${f};
vec2 uv = uvFromFlat(${m}, ${d}, index);
2022-11-18 17:13:29 +01:00
return sampleTexture(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`}function L8(r){let e=r.shapeInfo.logicalShape,t=r.name,o="get"+t.charAt(0).toUpperCase()+t.slice(1),{newShape:n,keptDims:s}=y.squeezeShape(e);if(n.length<e.length){let g=cc(r,n),x=["row","col","depth","depth2","depth3","depth4"];return`
${uc(g)}
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
2023-01-06 19:23:06 +01:00
return ${o}(${lc(x,s)});
2022-11-18 17:13:29 +01:00
}
`}let a=e[5],i=e[4]*a,p=e[3]*i,u=e[2]*p,c=e[1]*u;if(r.shapeInfo.isUniform)return`
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
int index = round(dot(
vec4(row, col, depth, depth2),
vec4(${c}, ${u}, ${p}, ${i})) +
dot(
vec2(depth3, depth4),
vec2(${a}, 1)));
2023-01-06 19:23:06 +01:00
${pc(r)}
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let l=r.shapeInfo.flatOffset,m=r.shapeInfo.texShape,d=m[0],f=m[1];if(f===c&&l==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
int texR = row;
float texC = dot(vec4(col, depth, depth2, depth3),
vec4(${u}, ${p}, ${i}, ${a})) +
float(depth4);
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${f}.0, ${d}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${t}, uv);
}
2022-11-20 22:20:02 +01:00
`;if(f===a&&l==null)return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
float texR = dot(vec4(row, col, depth, depth2),
vec4(${e[1]*e[2]*e[3]*e[4]},
${e[2]*e[3]*e[4]},
${e[3]*e[4]},
${e[4]})) + float(depth3);
int texC = depth4;
vec2 uv = (vec2(texC, texR) + halfCR) /
2022-11-20 22:20:02 +01:00
vec2(${f}.0, ${d}.0);
2022-11-18 17:13:29 +01:00
return sampleTexture(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`;let h=Mu(t);return`
2022-11-18 17:13:29 +01:00
float ${o}(int row, int col, int depth,
int depth2, int depth3, int depth4) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * ${c} + col * ${u} + depth * ${p} +
depth2 * ${i} + depth3 * ${a} + depth4 + ${h};
2022-11-20 22:20:02 +01:00
vec2 uv = uvFromFlat(${d}, ${f}, index);
2022-11-18 17:13:29 +01:00
return sampleTexture(${t}, uv);
}
2023-01-06 19:23:06 +01:00
`}function pc(r){let e=r.name,t=y.sizeFromShape(r.shapeInfo.logicalShape);return t<2?`return ${e};`:`
2022-11-18 17:13:29 +01:00
for (int i = 0; i < ${t}; i++) {
if (i == index) {
return ${e}[i];
}
}
2023-01-06 19:23:06 +01:00
`}function B8(r,e){let t=r.name,o=t.charAt(0).toUpperCase()+t.slice(1),n="get"+o+"AtOutCoords",s=r.shapeInfo.logicalShape.length,a=e.logicalShape.length,i=R$(r.shapeInfo.logicalShape,e.logicalShape),p=$e(a),u=a-s,c,l=["x","y","z","w","u","v"];s===0?c="":a<2&&i.length>=1?c="coords = 0;":c=i.map(b=>`coords.${l[b+u]} = 0;`).join(`
2022-11-20 22:20:02 +01:00
`);let m="";a<2&&s>0?m="coords":m=r.shapeInfo.logicalShape.map((b,C)=>`coords.${l[C+u]}`).join(", ");let d="return outputValue;",h=y.sizeFromShape(r.shapeInfo.logicalShape)===1,x=y.sizeFromShape(e.logicalShape)===1;if(s===1&&!h&&!x)d=`
2022-11-18 17:13:29 +01:00
return vec4(outputValue.xy, outputValue.xy);
2022-11-20 22:20:02 +01:00
`;else if(h&&!x)a===1?d=`
2022-11-18 17:13:29 +01:00
return vec4(outputValue.x, outputValue.x, 0., 0.);
2022-11-20 22:20:02 +01:00
`:d=`
2022-11-18 17:13:29 +01:00
return vec4(outputValue.x);
2022-11-20 22:20:02 +01:00
`;else if(i.length){let b=s-2,C=s-1;i.indexOf(b)>-1&&i.indexOf(C)>-1?d="return vec4(outputValue.x);":i.indexOf(b)>-1?d="return vec4(outputValue.x, outputValue.y, outputValue.x, outputValue.y);":i.indexOf(C)>-1&&(d="return vec4(outputValue.xx, outputValue.zz);")}return`
2022-11-18 17:13:29 +01:00
vec4 ${n}() {
${p} coords = getOutputCoords();
${c}
vec4 outputValue = get${o}(${m});
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`}function V8(r,e){let t=r.name,o=t.charAt(0).toUpperCase()+t.slice(1),n="get"+o+"AtOutCoords",s=e.texShape,a=r.shapeInfo.texShape,i=r.shapeInfo.logicalShape.length,p=e.logicalShape.length;if(!r.shapeInfo.isUniform&&i===p&&r.shapeInfo.flatOffset==null&&y.arraysEqual(a,s))return`
2022-11-18 17:13:29 +01:00
float ${n}() {
return sampleTexture(${t}, resultUV);
}
2023-01-06 19:23:06 +01:00
`;let u=$e(p),c=R$(r.shapeInfo.logicalShape,e.logicalShape),l=p-i,m,d=["x","y","z","w","u","v"];i===0?m="":p<2&&c.length>=1?m="coords = 0;":m=c.map(h=>`coords.${d[h+l]} = 0;`).join(`
2022-11-20 22:20:02 +01:00
`);let f="";return p<2&&i>0?f="coords":f=r.shapeInfo.logicalShape.map((h,g)=>`coords.${d[g+l]}`).join(", "),`
2022-11-18 17:13:29 +01:00
float ${n}() {
${u} coords = getOutputCoords();
${m}
2022-11-20 22:20:02 +01:00
return get${o}(${f});
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`}function $e(r){if(r<=1)return"int";if(r===2)return"ivec2";if(r===3)return"ivec3";if(r===4)return"ivec4";if(r===5)return"ivec5";if(r===6)return"ivec6";throw Error(`GPU for rank ${r} is not yet supported`)}function Of(r,e,t){let{newShape:o,keptDims:n}=y.squeezeShape(e),s=e.length,a=r&&s===3&&e[0]===1,i=a?e.slice(1):o,p=!r&&s>1&&!y.arraysEqual(e,t)&&o.length<s||a;return{useSqueezeShape:p,uniformShape:p?i:e,keptDims:n}}function cc(r,e){let t=JSON.parse(JSON.stringify(r));return t.shapeInfo.logicalShape=e,t}function lc(r,e){return e.map(t=>r[t]).join(", ")}function M$(r,e,t,o){let n=t.map((c,l)=>{let m={logicalShape:c.shape,texShape:c.isUniform?null:c.texData.texShape,isUniform:c.isUniform,isPacked:c.isUniform?!1:c.texData.isPacked,flatOffset:null};return c.texData!=null&&c.texData.slice!=null&&c.texData.slice.flatOffset>0&&(m.flatOffset=c.texData.slice.flatOffset),{name:e.variableNames[l],shapeInfo:m}}),s=n.map(c=>c.shapeInfo),a={logicalShape:o.shape,texShape:o.texData.texShape,isUniform:!1,isPacked:o.texData.isPacked,flatOffset:null},i=D$(n,a,e),p=jS(r.gl,i),u=r.createProgram(p);return O().get("ENGINE_COMPILE_ONLY")?{program:e,fragmentShader:p,source:i,webGLProgram:u,inShapeInfos:s,outShapeInfo:a,uniformLocations:null,customUniformLocations:null,infLoc:null,nanLoc:null,inShapesLocations:null,inTexShapesLocations:null,outShapeLocation:null,outShapeStridesLocation:null,outTexShapeLocation:null}:Object.assign({program:e,fragmentShader:p,source:i,webGLProgram:u,inShapeInfos:s,outShapeInfo:a},mw(r,e,u))}function mw(r,e,t){let o={},n={},s={},a=[],i,p,u,c=null,l=null;l=r.getUniformLocation(t,"NAN",!1),O().getNumber("WEBGL_VERSION")===1&&(c=r.getUniformLocation(t,"INFINITY",!1));let m=!1;for(let d=0;d<e.variableNames.length;d++){let f=e.variableNames[d];o[f]=r.getUniformLocation(t,f,m),o[`offset${f}`]=r.getUniformLocation(t,`offset${f}`,m),e.enableShapeUniforms&&(n[`${f}Shape`]=r.getUniformLocation(t,`${f}Shape`,m),s[`${f}TexShape`]=r.getUniformLocation(t,`${f}TexShape`,m))}return e.enableShapeUniforms&&(i=r.getUniformLocation(t,"outShape",m),u=r.getUniformLocation(t,"outShapeStrides",m),p=r.getUniformLocation(t,"outTexShape",m)),e.customUniforms&&e.customUniforms.forEach((d,f)=>{a[f]=r.getUniformLocation(t,d.name,m)}),{uniformLocations:o,customUniformLocations:a,infLoc:c,nanLoc:l,inShapesLocations:n,inTexShapesLocations:s,outShapeLocation:i,outShapeStridesLocation:u,outTexShapeLocation:p}}function P$(r,e){if(r.length!==e.length)throw Error(`Binary was compiled with ${r.length} inputs, but was executed with ${e.length} inputs`);r.forEach((t,o)=>{let n=t.logicalShape,s=e[o],a=s.shape;if(!y.arraysEqual(n,a))throw Error(`Binary was compiled with different shapes than the current args. Shapes ${n} and ${a} must match`);if(t.isUniform&&s.isUniform)return;let i=t.texShape,p=s.isUniform?null:s.texData.texShape;if(!y.arraysEqual(i,p))throw Error(`Binary was compiled with different texture shapes than the current args. Shape ${i} and ${p} must match`)})}function L$(r,e,t,o,n){e.program.enableShapeUniforms||(P$(e.inShapeInfos,t),P$([e.outShapeInfo],[o]));let s=o.texData.texture,a=o.texData.texShape;o.texData.isPacked?r.setOutputPackedMatrixTexture(s.texture,a[0],a[1]):r.setOutputMatrixTexture(s.texture,a[0],a[1]),r.setProgram(e.webGLProgram),O().getNumber("WEBGL_VERSION")===1&&e.infLoc!==null&&r.gl.uniform1f(e.infLoc,1/0),e.nanLoc!==null&&r.gl.uniform1f(e.nanLoc,NaN),t.forEach((p,u)=>{let c=e.program.variableNames[u],l=e.uniformLocations[c],m=e.uniformLocations[`offset${c}`],d=e.inShapesLocations[`${c}Shape`],f=e.inTexShapesLocations[`${c}TexShape`];if(d){let{uniformShape:h}=Of(e.program.packedInputs,p.shape,p.texData.texShape);switch(h.length){case 1:r.gl.uniform1iv(d,new Int32Array(h));break;case 2:r.gl.uniform2iv(d,new Int32Array(h));break;case 3:r.gl.uniform3iv(d,new Int32Array(h));break;case 4:r.gl.uniform4iv(d,new Int32Array(h));break;default:break}}if(f&&r.gl.uniform2i(f,p.texData.texShape[0],p.texData.texShape[1]),l!=null){if(p.isUniform){if(y.sizeFromShape(p.shape)<2)r.gl.uniform1f(l,p.uniformValues[0]);else{let h=p.uniformValu
2022-11-18 17:13:29 +01:00
ivec3 outCoordsFromFlatIndex(int index) {
2023-01-06 19:23:06 +01:00
${this.enableShapeUniforms?Pu(["r","c","d"],e):ps(["r","c","d"],e)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
void main() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(texShape[0], texShape[1]));
int index = 4 * (resTexRC.x * texShape[1] + resTexRC.y);
vec4 result = vec4(0.);
for (int i=0; i<4; i++) {
int flatIndex = index + i;
ivec3 rc = outCoordsFromFlatIndex(flatIndex);
result[i] = getA(rc.x, rc.y, rc.z);
}
${t.output} = result;
}
2023-01-06 19:23:06 +01:00
`}};var Mf=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outPackingScheme=Li.DENSE,this.customUniforms=[{name:"texShape",type:"ivec2"}];let t=wt();this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
ivec3 outCoordsFromFlatIndex(int index) {
2023-01-06 19:23:06 +01:00
${this.enableShapeUniforms?Pu(["r","c","d"],e):ps(["r","c","d"],e)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
void main() {
ivec2 resTexRC = ivec2(resultUV.yx * vec2(texShape[0], texShape[1]));
int index = 4 * (resTexRC.x * texShape[1] + resTexRC.y);
vec4 result = vec4(0.);
for (int i=0; i<4; i++) {
int flatIndex = index + i;
ivec3 rc = outCoordsFromFlatIndex(flatIndex);
result[i] = getChannel(getA(rc.x, rc.y, rc.z), vec2(rc.y, rc.z));
}
${t.output} = result;
}
2023-01-06 19:23:06 +01:00
`}};var Lf=class{constructor(e){this.variableNames=["A"],this.outTexUsage=ur.DOWNLOAD;let t=wt();this.outputShape=e,this.userCode=`
${Ff}
2022-11-18 17:13:29 +01:00
void main() {
float x = getAAtOutCoords();
${t.output} = encode_float(x);
}
2023-01-06 19:23:06 +01:00
`}};var Bf=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!1,this.outTexUsage=ur.DOWNLOAD;let t=wt();this.outputShape=e,this.userCode=`
${Ff}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
float x = getChannel(getAAtOutCoords(), vec2(coords.y, coords.z));
${t.output} = encode_float(x);
}
2023-01-06 19:23:06 +01:00
`}};var U8={R:0,G:1,B:2,A:3},Tl=class{constructor(e,t=!1,o="RGBA"){this.variableNames=["A"],this.customUniforms=[{name:"texShape",type:"ivec2"}];let n=wt();this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let s="result";t&&(s="floor(result * 255. + 0.5)");let a="";for(let i=0;i<o.length;i++){let p=o[i];a+=`
2022-11-18 17:13:29 +01:00
if(offset == ${i}) {
2023-01-06 19:23:06 +01:00
result = values[${U8[p]}];
2022-11-18 17:13:29 +01:00
}`}this.userCode=`
2023-01-06 19:23:06 +01:00
${this.enableShapeUniforms?ac():sc(e)}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
int flatIndex = getFlatIndex(coords);
float result = 0.;
int offset = imod(flatIndex, ${o.length});
flatIndex = idiv(flatIndex, ${o.length}, 1.);
int r = flatIndex / texShape[1];
if (r < texShape[0]) {
int c = imod(flatIndex, texShape[1]);
vec2 uv = (vec2(c, r) + halfCR) / vec2(texShape[1], texShape[0]);
vec4 values = ${n.texture2D}(A, uv);
${a}
}
${n.output} = vec4(${s}, 0., 0., 0.);
}
2023-01-06 19:23:06 +01:00
`}};var Vf=class{constructor(e,t=!1){this.variableNames=["A"],this.packedInputs=!1,this.packedOutput=!0,this.customUniforms=[{name:"texShape",type:"ivec2"}];let o=wt();this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let n="",s="result";t&&(s="floor(result * 255. + 0.5)");for(let a=0;a<=1;a++)for(let i=0;i<=1;i++){let p=a*2+i;n+=`
2022-11-18 17:13:29 +01:00
localCoords = coords;
if(localCoords[2] + ${i} < ${this.enableShapeUniforms?"outShape[2]":`${e[2]}`}) {
localCoords[2] += ${i};
if (localCoords[1] + ${a} < ${this.enableShapeUniforms?"outShape[1]":`${e[1]}`}) {
localCoords[1] += ${a};
flatIndex = getFlatIndex(localCoords);
offset = imod(flatIndex, 4);
flatIndex = idiv(flatIndex, 4, 1.);
int r = flatIndex / texShape[1];
int c = imod(flatIndex, texShape[1]);
vec2 uv = (vec2(c, r) + halfCR) / vec2(texShape[1], texShape[0]);
values = ${o.texture2D}(A, uv);
if (offset == 0) {
result[${p}] = values[0];
} else if (offset == 1) {
result[${p}] = values[1];
} else if (offset == 2) {
result[${p}] = values[2];
} else {
result[${p}] = values[3];
}
}
}
`}this.userCode=`
2023-01-06 19:23:06 +01:00
${this.enableShapeUniforms?ac():sc(e)}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
vec4 result = vec4(0.);
int flatIndex, r, c, offset;
ivec3 localCoords;
vec2 uv;
vec4 values;
${n}
${o.output} = ${s};
}
2023-01-06 19:23:06 +01:00
`}};var $w={};Ge($w,{bindVertexProgramAttributeStreams:()=>Sw,createBufferFromOutputTexture:()=>vw,createFloat16MatrixTexture:()=>xw,createFloat16PackedMatrixTexture:()=>Cw,createFloat32MatrixTexture:()=>gw,createIndexBuffer:()=>hw,createPackedMatrixTexture:()=>bw,createUnsignedBytesMatrixTexture:()=>yw,createVertexBuffer:()=>fw,createVertexShader:()=>dw,downloadByteEncodedFloatMatrixFromOutputTexture:()=>Nw,downloadFloat32MatrixFromBuffer:()=>kw,downloadMatrixFromPackedOutputTexture:()=>_w,downloadPackedMatrixFromBuffer:()=>Tw,getInternalFormatForFloat16MatrixTexture:()=>Wf,getInternalFormatForFloat16PackedMatrixTexture:()=>Hf,getInternalFormatForFloat32MatrixTexture:()=>zf,getInternalFormatForPackedMatrixTexture:()=>Gf,getInternalFormatForUnsignedBytesMatrixTexture:()=>Uf,uploadDenseMatrixToTexture:()=>ww,uploadPixelDataToTexture:()=>Iw});function dw(r){let e=wt(),t=`${e.version}
2022-11-18 17:13:29 +01:00
precision highp float;
${e.attribute} vec3 clipSpacePos;
${e.attribute} vec2 uv;
${e.varyingVs} vec2 resultUV;
void main() {
gl_Position = vec4(clipSpacePos, 1);
resultUV = uv;
2023-01-06 19:23:06 +01:00
}`;return qS(r,t)}function fw(r){let e=new Float32Array([-1,1,0,0,1,-1,-1,0,0,0,1,1,0,1,1,1,-1,0,1,0]);return QS(r,e)}function hw(r){let e=new Uint16Array([0,1,2,2,1,3]);return ZS(r,e)}function _l(r,e,t,o,n,s){ew(e,t);let a=JS(r),i=r.TEXTURE_2D;return pe(r,()=>r.bindTexture(i,a)),pe(r,()=>r.texParameteri(i,r.TEXTURE_WRAP_S,r.CLAMP_TO_EDGE)),pe(r,()=>r.texParameteri(i,r.TEXTURE_WRAP_T,r.CLAMP_TO_EDGE)),pe(r,()=>r.texParameteri(i,r.TEXTURE_MIN_FILTER,r.NEAREST)),pe(r,()=>r.texParameteri(i,r.TEXTURE_MAG_FILTER,r.NEAREST)),O().getNumber("WEBGL_VERSION")===1?pe(r,()=>r.texImage2D(i,0,o,e,t,0,n,s,null)):pe(r,()=>r.texStorage2D(i,1,o,e,t)),pe(r,()=>r.bindTexture(r.TEXTURE_2D,null)),{texture:a,texShape:[t,e]}}function zf(r){return r.internalFormatFloat}function gw(r,e,t,o){let[n,s]=Ou(e,t);return _l(r,n,s,zf(o),o.textureFormatFloat,r.FLOAT)}function Wf(r){return r.internalFormatHalfFloat}function xw(r,e,t,o){let[n,s]=Ou(e,t);return _l(r,n,s,Wf(o),o.textureFormatFloat,o.textureTypeHalfFloat)}function Uf(r){return r.downloadTextureFormat}function yw(r,e,t,o){let[n,s]=Ou(e,t);return _l(r,n,s,Uf(o),r.RGBA,r.UNSIGNED_BYTE)}function Gf(r){return r.internalFormatPackedFloat}function bw(r,e,t,o){let[n,s]=Qs(e,t);return _l(r,n,s,Gf(o),r.RGBA,r.FLOAT)}function Hf(r){return r.internalFormatPackedHalfFloat}function Cw(r,e,t,o){let[n,s]=Qs(e,t);return _l(r,n,s,Hf(o),r.RGBA,o.textureTypeHalfFloat)}function Sw(r,e,t){return pe(r,()=>r.bindBuffer(r.ARRAY_BUFFER,t)),Af(r,e,"clipSpacePos",t,3,20,0)&&Af(r,e,"uv",t,2,20,12)}function ww(r,e,t,o,n,s){pe(r,()=>r.bindTexture(r.TEXTURE_2D,e));let a,i,p;n instanceof Uint8Array?(a=new Uint8Array(t*o*4),i=r.UNSIGNED_BYTE,p=r.RGBA):(a=new Float32Array(t*o*4),i=r.FLOAT,p=s.internalFormatPackedFloat),a.set(n),O().getNumber("WEBGL_VERSION")===2?pe(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,t,o,r.RGBA,i,a)):pe(r,()=>r.texImage2D(r.TEXTURE_2D,0,p,t,o,0,r.RGBA,i,a)),pe(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function Iw(r,e,t){pe(r,()=>r.bindTexture(r.TEXTURE_2D,e)),t.data instanceof Uint8Array?O().getNumber("WEBGL_VERSION")===2?pe(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,t.width,t.height,r.RGBA,r.UNSIGNED_BYTE,t.data)):pe(r,()=>r.texImage2D(r.TEXTURE_2D,0,r.RGBA,t.width,t.height,0,r.RGBA,r.UNSIGNED_BYTE,t.data)):O().getNumber("WEBGL_VERSION")===2?pe(r,()=>r.texSubImage2D(r.TEXTURE_2D,0,0,0,r.RGBA,r.UNSIGNED_BYTE,t)):pe(r,()=>r.texImage2D(r.TEXTURE_2D,0,r.RGBA,r.RGBA,r.UNSIGNED_BYTE,t)),pe(r,()=>r.bindTexture(r.TEXTURE_2D,null))}function vw(r,e,t,o){let n=r.createBuffer();pe(r,()=>r.bindBuffer(r.PIXEL_PACK_BUFFER,n));let i=4*4*e*t;return pe(r,()=>r.bufferData(r.PIXEL_PACK_BUFFER,i,r.STREAM_READ)),pe(r,()=>r.readPixels(0,0,t,e,r.RGBA,r.FLOAT,0)),pe(r,()=>r.bindBuffer(r.PIXEL_PACK_BUFFER,null)),n}function kw(r,e,t){let o=r,n=new Float32Array(t);return o.bindBuffer(o.PIXEL_PACK_BUFFER,e),o.getBufferSubData(o.PIXEL_PACK_BUFFER,0,n),o.bindBuffer(o.PIXEL_PACK_BUFFER,null),n}function Nw(r,e,t,o){let[n,s]=Ou(e,t),a=4,i=new Uint8Array(k$(e*t,a));return pe(r,()=>r.readPixels(0,0,n,s,o.downloadTextureFormat,r.UNSIGNED_BYTE,i)),new Float32Array(i.buffer)}function Tw(r,e,t,o,n,s,a,i){let p=r,u=new Float32Array(N$(s,a));return p.bindBuffer(p.PIXEL_PACK_BUFFER,e),p.getBufferSubData(p.PIXEL_PACK_BUFFER,0,u),p.bindBuffer(p.PIXEL_PACK_BUFFER,null),u}function _w(r,e,t){let o=new Float32Array(e*t*4);return pe(r,()=>r.readPixels(0,0,t,e,r.RGBA,r.FLOAT,o)),o}var Lu=class{constructor(e){this.outputTexture=null,this.program=null,this.disposed=!1,this.itemsToPoll=[];let t=O().getNumber("WEBGL_VERSION");if(e!=null?(this.gl=e,US(t,e)):this.gl=zr(t),e=this.gl,O().getNumber("WEBGL_VERSION")===2){let s=e;this.createVertexArray=()=>pe(s,()=>s.createVertexArray()),this.bindVertexArray=a=>pe(s,()=>s.bindVertexArray(a)),this.deleteVertexArray=a=>pe(s,()=>s.deleteVertexArray(a)),this.getVertexArray=()=>pe(s,()=>s.getParameter(s.VERTEX_ARRAY_BINDING))}else if(e!=null){let s=e.getExtension("OES_vertex_array_object");if(s==null)throw new Error("All WebGL1 implementations are expected to offer OES_vertex_array_object.");this.createVertexArray=()=>p
2022-11-18 17:13:29 +01:00
void main() {
setOutput(vec4(getA(), 0., 0., 0.));
}
2023-01-06 19:23:06 +01:00
`;else{let t=Et("rc",this.rank),o=$e(this.rank),n=this.getOutOfBoundsCondition(t),s=this.getSetup(t),a=this.getOutput(t);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o} rc = getOutputCoords();
if(${n}) {
setOutput(vec4(0));
} else {
${s}
setOutput(vec4(${a}));
}
}
`}}getSourceCoordsArr(e){let t=[];for(let o=0;o<=1;o++)for(let n=0;n<=1;n++){let s=`${o===0?"r":"rp1"}, ${n===0?"c":"cp1"}`;for(let a=2;a<this.rank;a++)s=`${e[e.length-1-a]},`+s;t.push(s)}return t}getOutOfBoundsCondition(e){if(this.rank===1)return`rc > ${this.enableShapeUniforms?"outShape":this.outputShape[0]}`;let t="";for(let o=this.rank-2;o<this.rank;o++)t+=`${e[o]} >= ${this.enableShapeUniforms?`outShape[${o}]`:this.outputShape[o]}`,o<this.rank-1&&(t+="||");return t}getSetup(e){if(this.rank===1)return"";let t=e.slice(-2),o=this.enableShapeUniforms?`outShape[${this.rank} - 1]`:this.outputShape[this.rank-1],n=this.enableShapeUniforms?`outShape[${this.rank} - 2]`:this.outputShape[this.rank-2];return`
int r = ${t[0]};
int c = ${t[1]};
int rp1 = r + 1;
int cp1 = c + 1;
bool cEdge = cp1 >= ${o};
bool rEdge = rp1 >= ${n};
`}getOutput(e){let t=this.getSourceCoordsArr(e);return this.rank===1?`getA(rc), (rc + 1 >= ${this.enableShapeUniforms?"outShape":this.outputShape[0]} ? 0. : getA(rc + 1)), 0, 0`:`getA(${t[0]}),
cEdge ? 0. : getA(${t[1]}),
rEdge ? 0. : getA(${t[2]}),
2023-01-06 19:23:06 +01:00
rEdge || cEdge ? 0. : getA(${t[3]})`}};var mc=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"inputShape",type:"ivec3"}],this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let o="";for(let n=0;n<4;n++){let s="thisRC = rc;";n%2===1&&(s+="thisRC.z += 1;"),n>1&&(s+="thisRC.y += 1;"),o+=`
2022-11-18 17:13:29 +01:00
${s}
${n>0?"if(thisRC.y < rows && thisRC.z < cols){":""}
int flatIndex = getFlatIndex(thisRC);
ivec3 inputRC = inputCoordsFromReshapedOutCoords(flatIndex);
vec2 inputRCInnerDims = vec2(float(inputRC.y),float(inputRC.z));
result[${n}] =
getChannel(getA(inputRC.x, inputRC.y, inputRC.z), inputRCInnerDims);
${n>0?"}":""}
`}this.userCode=`
2023-01-06 19:23:06 +01:00
${H8(t,this.enableShapeUniforms)}
${this.enableShapeUniforms?ac():sc(e)}
2022-11-18 17:13:29 +01:00
void main() {
ivec3 rc = getOutputCoords();
vec4 result = vec4(0.);
ivec3 thisRC;
int rows = ${this.enableShapeUniforms?"outShape[1]":e[1]};
int cols = ${this.enableShapeUniforms?"outShape[2]":e[2]};
${o}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function H8(r,e){return`
2022-11-18 17:13:29 +01:00
ivec3 inputCoordsFromReshapedOutCoords(int index) {
2023-01-06 19:23:06 +01:00
${e?A$(["r","c","d"],"inputShape"):ps(["r","c","d"],r)}
2022-11-18 17:13:29 +01:00
return ivec3(r, c, d);
}
2023-01-06 19:23:06 +01:00
`}var Yf=class{constructor(e){this.gpgpu=e,this.numUsedTextures=0,this.numFreeTextures=0,this._numBytesAllocated=0,this._numBytesFree=0,this.freeTextures={},this.logEnabled=!1,this.usedTextures={}}acquireTexture(e,t,o){let n=AE(t,o),s=RE(e,n,o);s in this.freeTextures||(this.freeTextures[s]=[]),s in this.usedTextures||(this.usedTextures[s]=[]);let a=EE(e,n,this.gpgpu.gl,this.gpgpu.textureConfig,o);if(this.freeTextures[s].length>0){this.numFreeTextures--,this.numUsedTextures++,this._numBytesFree-=a,this.log();let p=this.freeTextures[s].shift();return this.usedTextures[s].push(p),p}let i;return n===Zt.PACKED_2X2_FLOAT32?i=this.gpgpu.createPackedMatrixTexture(e[0],e[1]):n===Zt.PACKED_2X2_FLOAT16?i=this.gpgpu.createFloat16PackedMatrixTexture(e[0],e[1]):n===Zt.UNPACKED_FLOAT32?i=this.gpgpu.createFloat32MatrixTexture(e[0],e[1]):n===Zt.UNPACKED_FLOAT16?i=this.gpgpu.createFloat16MatrixTexture(e[0],e[1]):n===Zt.PACKED_4X1_UNSIGNED_BYTE&&(i=this.gpgpu.createUnsignedBytesMatrixTexture(e[0],e[1])),this.usedTextures[s].push(i),this.numUsedTextures++,this._numBytesAllocated+=a,this.log(),i}releaseTexture(e,t,o,n){if(this.freeTextures==null)return;let s=AE(o,n),a=RE(t,s,n);a in this.freeTextures||(this.freeTextures[a]=[]);let i=EE(t,s,this.gpgpu.gl,this.gpgpu.textureConfig,n),p=O().get("WEBGL_DELETE_TEXTURE_THRESHOLD");p!==-1&&this._numBytesAllocated>p?(this.gpgpu.deleteMatrixTexture(e.texture),this._numBytesAllocated-=i):(this.freeTextures[a].push(e),this.numFreeTextures++,this._numBytesFree+=i),this.numUsedTextures--;let u=this.usedTextures[a],c=u.indexOf(e);if(c<0)throw new Error("Cannot release a texture that was never provided by this texture manager");u.splice(c,1),this.log()}log(){if(!this.logEnabled)return;let e=this.numFreeTextures+this.numUsedTextures;console.log("Free/Used",`${this.numFreeTextures} / ${this.numUsedTextures}`,`(${e})`);let t=this._numBytesFree/this._numBytesAllocated;console.log(`Bytes allocated: ${this._numBytesAllocated}`),console.log(`Bytes unused: ${this._numBytesFree} (${Math.round(100*t)}%)`)}get numBytesAllocated(){return this._numBytesAllocated}get numBytesFree(){return this._numBytesFree}getNumUsedTextures(){return this.numUsedTextures}getNumFreeTextures(){return this.numFreeTextures}dispose(){if(this.freeTextures!=null){for(let e in this.freeTextures)this.freeTextures[e].forEach(t=>{this.gpgpu.deleteMatrixTexture(t.texture)});for(let e in this.usedTextures)this.usedTextures[e].forEach(t=>{this.gpgpu.deleteMatrixTexture(t.texture)});this.freeTextures=null,this.usedTextures=null,this.numUsedTextures=0,this.numFreeTextures=0,this._numBytesAllocated=0,this._numBytesFree=0}}};function K8(r,e){let t=r;if(e===t.R32F)return 4;if(e===t.R16F)return 2;if(e===t.RGBA32F)return 16;if(e===r.RGBA)return 16;if(e===t.RGBA16F)return 8;if(e===t.RGBA8)return 4;throw new Error(`Unknown internal format ${e}`)}function EE(r,e,t,o,n){let s=q8(e,o),a;if(n){let[p,u]=Qs(r[0],r[1]);a=p*u}else{let[p,u]=Ou(r[0],r[1]);a=p*u}let i=K8(t,s);return a*i}function q8(r,e){switch(r){case Zt.PACKED_2X2_FLOAT32:return Gf(e);case Zt.PACKED_2X2_FLOAT16:return Hf(e);case Zt.UNPACKED_FLOAT32:return zf(e);case Zt.UNPACKED_FLOAT16:return Wf(e);case Zt.PACKED_4X1_UNSIGNED_BYTE:return Uf(e);default:throw new Error(`Unknown physical texture type ${r}`)}}function j8(r){return O().getBool("WEBGL_RENDER_FLOAT32_ENABLED")?r?Zt.PACKED_2X2_FLOAT32:Zt.UNPACKED_FLOAT32:r?Zt.PACKED_2X2_FLOAT16:Zt.UNPACKED_FLOAT16}function AE(r,e){if(r===ur.UPLOAD)return Zt.PACKED_2X2_FLOAT32;if(r===ur.RENDER||r==null)return j8(e);if(r===ur.DOWNLOAD||r===ur.PIXELS)return Zt.PACKED_4X1_UNSIGNED_BYTE;throw new Error(`Unknown logical texture type ${r}`)}function RE(r,e,t){return`${r[0]}_${r[1]}_${e}_${t}`}var Jt=class{constructor(e,t){this.variableNames=["A"],this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
float unaryOperation(float x) {
${t}
}
void main() {
float x = getAAtOutCoords();
float y = unaryOperation(x);
setOutput(y);
}
2023-01-06 19:23:06 +01:00
`}},zt="if (isnan(x)) return x;",DE="return x;",Aw="return abs(x);";var FE="return (x >= 0.0) ? x : (exp(x) - 1.0);",OE=zt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : x;
2023-01-06 19:23:06 +01:00
`,PE=zt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : min(6.0, x);
2023-01-06 19:23:06 +01:00
`,Zs="return x;",ME="return 1.0 / (1.0 + exp(-1.0 * x));";var BE="return x;",VE=`
2022-11-18 17:13:29 +01:00
vec4 result;
result.r = (x.r >= 0.0) ? x.r : (exp(x.r) - 1.0);
result.g = (x.g >= 0.0) ? x.g : (exp(x.g) - 1.0);
result.b = (x.b >= 0.0) ? x.b : (exp(x.b) - 1.0);
result.a = (x.a >= 0.0) ? x.a : (exp(x.a) - 1.0);
return result;
2023-01-06 19:23:06 +01:00
`,zE=`
2022-11-18 17:13:29 +01:00
vec4 result = x * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
2023-01-06 19:23:06 +01:00
`,WE=`
2022-11-18 17:13:29 +01:00
vec4 result = min(x, vec4(6.)) * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
2023-01-06 19:23:06 +01:00
`,UE="return 1.0 / (1.0 + exp(-1.0 * x));",Er=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
vec4 unaryOperation(vec4 x) {
${t}
}
void main() {
vec4 x = getAAtOutCoords();
vec4 y = unaryOperation(x);
setOutput(y);
}
2023-01-06 19:23:06 +01:00
`}};var Qf=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!1,this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let t=e.length,o=Et("rc",t),n=$e(t),s=$E(t,o),a=o.slice(-2),i=t<=1?"rc":`vec2(${a.join(",")})`;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} rc = getOutputCoords();
vec4 packedInput = getA(${s});
setOutput(getChannel(packedInput, ${i}));
}
2023-01-06 19:23:06 +01:00
`}};var Y8=Vt.whereImpl,Q8=1e-7,Z8=1e-4,Zf={};function J8(r){return r in Zf||(Zf[r]={}),Zf[r]}var eY=O().getNumber("CPU_HANDOFF_SIZE_THRESHOLD"),tY=600;function rY(){return O().global.screen==null?1024:O().global.screen.height*O().global.screen.width*window.devicePixelRatio*tY/1024/1024}var Vi=class extends Zr{constructor(e){if(super(),this.pendingRead=new WeakMap,this.pendingDisposal=new WeakSet,this.dataRefCount=new WeakMap,this.numBytesInGPU=0,this.uploadWaitMs=0,this.downloadWaitMs=0,this.lastGlFlushTime=0,this.warnedAboutMemory=!1,this.pendingDeletes=0,this.disposed=!1,!O().getBool("HAS_WEBGL"))throw new Error("WebGL is not supported on this device");let t;if(e!=null){if(e instanceof Lu)t=e;else{let o=zr(O().getNumber("WEBGL_VERSION"),e);t=new Lu(o)}this.binaryCache={},this.gpgpuCreatedLocally=!1}else{let o=zr(O().getNumber("WEBGL_VERSION"));t=new Lu(o),this.binaryCache=J8(O().getNumber("WEBGL_VERSION")),this.gpgpuCreatedLocally=!0}this.gpgpu=t,this.canvas=this.gpgpu.gl.canvas,this.textureManager=new Yf(this.gpgpu),this.numMBBeforeWarning=rY(),this.texData=new Fo(this,sr())}nextDataId(){return Vi.nextDataId++}numDataIds(){return this.texData.numDataIds()-this.pendingDeletes}writeTexture(e,t,o,n,s,a){let i=this.makeTensorInfo(t,o),p=this.texData.get(i.dataId);p.isPacked=!1,p.texture={texture:e,texShape:[n,s]},p.texShape=[n,s];let u=oc(t),c=new Tl(u,!1,a),l=this.runWebGLProgram(c,[i],o,[[n,s]]);return l.shape=t,p.texture=null,this.disposeIntermediateTensorInfo(i),l.dataId}write(e,t,o){if((O().getBool("WEBGL_CHECK_NUMERICAL_PROBLEMS")||O().getBool("DEBUG"))&&this.checkNumericalProblems(e),o==="complex64"&&e!=null)throw new Error("Cannot write to a complex64 dtype. Please use tf.complex(real, imag).");let n={id:this.nextDataId()};return this.texData.set(n,{shape:t,dtype:o,values:e,usage:ur.UPLOAD,refCount:1}),n}refCount(e){return this.texData.has(e)?this.texData.get(e).refCount:0}incRef(e){let t=this.texData.get(e);t.refCount++}decRef(e){if(this.texData.has(e)){let t=this.texData.get(e);t.refCount--}}move(e,t,o,n,s){if(O().getBool("DEBUG")&&this.checkNumericalProblems(t),n==="complex64")throw new Error("Cannot write to a complex64 dtype. Please use tf.complex(real, imag).");this.texData.set(e,{shape:o,dtype:n,values:t,usage:ur.UPLOAD,refCount:s})}disposeIntermediateTensorInfo(e){this.disposeData(e.dataId)}readSync(e){let t=this.texData.get(e),{values:o,dtype:n,complexTensorInfos:s,slice:a,shape:i,isPacked:p}=t;if(a!=null){let m;p?m=new Er(i,Zs):m=new Jt(i,Zs);let d=this.runWebGLProgram(m,[{dataId:e,shape:i,dtype:n}],n),f=this.readSync(d.dataId);return this.disposeIntermediateTensorInfo(d),f}if(o!=null)return this.convertAndCacheOnCPU(e);if(n==="string")return o;let u=this.activeTimers!=null,c;u&&(c=y.now());let l;if(n==="complex64"){let m=this.readSync(s.real.dataId),d=this.readSync(s.imag.dataId);l=S.mergeRealAndImagArrays(m,d)}else l=this.getValuesFromTexture(e);return u&&(this.downloadWaitMs+=y.now()-c),this.convertAndCacheOnCPU(e,l)}async read(e){if(this.pendingRead.has(e)){let f=this.pendingRead.get(e);return new Promise(h=>f.push(h))}let t=this.texData.get(e),{values:o,shape:n,slice:s,dtype:a,complexTensorInfos:i,isPacked:p}=t;if(s!=null){let f;p?f=new Er(n,Zs):f=new Jt(n,Zs);let h=this.runWebGLProgram(f,[{dataId:e,shape:n,dtype:a}],a),g=this.read(h.dataId);return this.disposeIntermediateTensorInfo(h),g}if(o!=null)return this.convertAndCacheOnCPU(e);if(O().getBool("DEBUG")&&!O().getBool("WEBGL_DOWNLOAD_FLOAT_ENABLED")&&O().getNumber("WEBGL_VERSION")===2)throw new Error("tensor.data() with WEBGL_DOWNLOAD_FLOAT_ENABLED=false and WEBGL_VERSION=2 not yet supported.");let u=null,c;if(a!=="complex64"&&O().get("WEBGL_BUFFER_SUPPORTED")){c=this.decode(e);let f=this.texData.get(c.dataId);u=this.gpgpu.createBufferFromTexture(f.texture.texture,...Il(n))}this.pendingRead.set(e,[]),a!=="complex64"&&await this.gpgpu.createAndWaitForFence();let l;if(a==="complex64"){let f=await Promise.all([this.read(i.real.dataId),this.read(i.imag.dataId)]),h=f[0],g=f[1];l=S.mergeRealAndImagArrays(h,g)}else if(u==null)l=this.getValuesFromT
2022-11-18 17:13:29 +01:00
if (isnan(a)) return a;
if (isnan(b)) return b;
2023-01-06 19:23:06 +01:00
`;var io=class{constructor(e,t,o){this.variableNames=["A","B"],this.outputShape=S.assertAndGetBroadcastShape(t,o),this.enableShapeUniforms=lt(this.outputShape.length),this.userCode=`
2022-11-18 17:13:29 +01:00
float binaryOperation(float a, float b) {
${e}
}
void main() {
float a = getAAtOutCoords();
float b = getBAtOutCoords();
setOutput(binaryOperation(a, b));
}
2023-01-06 19:23:06 +01:00
`}};var Js=`
2022-11-18 17:13:29 +01:00
result.r = isNaN.r ? NAN : result.r;
result.g = isNaN.g ? NAN : result.g;
result.b = isNaN.b ? NAN : result.b;
result.a = isNaN.a ? NAN : result.a;
2023-01-06 19:23:06 +01:00
`;var _o=class{constructor(e,t,o,n=!1){this.variableNames=["A","B"],this.supportsBroadcasting=!0,this.packedInputs=!0,this.packedOutput=!0,this.outputShape=S.assertAndGetBroadcastShape(t,o);let s=this.outputShape.length;this.enableShapeUniforms=lt(s);let a="";if(n)if(s===0||y.sizeFromShape(this.outputShape)===1)a=`
2022-11-18 17:13:29 +01:00
result.y = 0.;
result.z = 0.;
result.w = 0.;
`;else if(a=`
2023-01-06 19:23:06 +01:00
${$e(s)} coords = getOutputCoords();
2022-11-18 17:13:29 +01:00
`,s===1)this.enableShapeUniforms?a+=`
result.y = (coords + 1) >= outShape ? 0. : result.y;
result.z = 0.;
result.w = 0.;
`:a+=`
result.y = (coords + 1) >= ${this.outputShape[0]} ? 0. : result.y;
result.z = 0.;
result.w = 0.;
2023-01-06 19:23:06 +01:00
`;else{let p=Et("coords",s);this.enableShapeUniforms?a+=`
2022-11-18 17:13:29 +01:00
bool nextRowOutOfBounds =
(${p[s-2]} + 1) >= outShape[${s} - 2];
bool nextColOutOfBounds =
(${p[s-1]} + 1) >= outShape[${s} - 1];
result.y = nextColOutOfBounds ? 0. : result.y;
result.z = nextRowOutOfBounds ? 0. : result.z;
result.w = nextColOutOfBounds || nextRowOutOfBounds ? 0. : result.w;
`:a+=`
bool nextRowOutOfBounds =
(${p[s-2]} + 1) >= ${this.outputShape[s-2]};
bool nextColOutOfBounds =
(${p[s-1]} + 1) >= ${this.outputShape[s-1]};
result.y = nextColOutOfBounds ? 0. : result.y;
result.z = nextRowOutOfBounds ? 0. : result.z;
result.w = nextColOutOfBounds || nextRowOutOfBounds ? 0. : result.w;
`}this.userCode=`
vec4 binaryOperation(vec4 a, vec4 b) {
${e}
}
void main() {
vec4 a = getAAtOutCoords();
vec4 b = getBAtOutCoords();
vec4 result = binaryOperation(a, b);
${a}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function At(r){let{inputs:e,backend:t}=r,{x:o}=e;return t.incRef(o.dataId),{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}var HE={kernelName:mo,backendName:"webgl",kernelFunc:At};function Ar(r){let{inputs:e,backend:t}=r,{real:o,imag:n}=e,s=t.makeTensorInfo(o.shape,"complex64"),a=t.texData.get(s.dataId),i=At({inputs:{x:o},backend:t}),p=At({inputs:{x:n},backend:t});return a.complexTensorInfos={real:i,imag:p},s}var KE={kernelName:ri,backendName:"webgl",kernelFunc:Ar};var Rw="return (a < 0.) ? b * a : a;",Dw=`
2022-11-18 17:13:29 +01:00
vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
2023-01-06 19:23:06 +01:00
`;function sY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{alpha:s}=o,a=t.makeTensorInfo([],"float32",y.createScalarValue(s,"float32")),i=O().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new _o(Dw,n.shape,a.shape):new io(Rw,n.shape,a.shape),p=t.runWebGLProgram(i,[n,a],"float32");return t.disposeIntermediateTensorInfo(a),p}var qE={kernelName:ln,backendName:"webgl",kernelFunc:sY};var Fw="return (a < 0.) ? b * a : a;",Ow=`
2022-11-18 17:13:29 +01:00
vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
2023-01-06 19:23:06 +01:00
`;function aY(r){let{inputs:e,backend:t}=r,{x:o,alpha:n}=e,s=O().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new _o(Ow,o.shape,n.shape):new io(Fw,o.shape,n.shape);return t.runWebGLProgram(s,[o,n],"float32")}var jE={kernelName:Rn,backendName:"webgl",kernelFunc:aY};var $o="if (isnan(x)) return x;";function he({opSnippet:r,packedOpSnippet:e,cpuKernelImpl:t,dtype:o}){return({inputs:n,backend:s})=>{let{x:a}=n,i=s,p=o||a.dtype;if(i.shouldExecuteOnCPU([a])&&t!=null){let l=i.texData.get(a.dataId),m=t(l.values,p);return i.makeTensorInfo(a.shape,p,m)}let u=O().getBool("WEBGL_PACK_UNARY_OPERATIONS")&&e!=null,c;return u?c=new Er(a.shape,e):c=new Jt(a.shape,r),i.runWebGLProgram(c,[a],p)}}function tt({opSnippet:r,packedOpSnippet:e,checkOutOfBounds:t=!1,supportsComplex:o=!1,cpuKernelImpl:n,dtype:s}){return({inputs:a,backend:i})=>{let{a:p,b:u}=a,c=i;if(o&&p.dtype==="complex64"){let f=c.texData.get(p.dataId),h=c.texData.get(u.dataId),[g,x]=[[f.complexTensorInfos.real,h.complexTensorInfos.real],[f.complexTensorInfos.imag,h.complexTensorInfos.imag]].map(C=>{let[w,k]=C,_={dataId:w.dataId,dtype:w.dtype,shape:p.shape},E={dataId:k.dataId,dtype:k.dtype,shape:u.shape},A=new io(r,p.shape,u.shape);return c.runWebGLProgram(A,[_,E],dt(w.dtype,k.dtype))}),b=Ar({inputs:{real:g,imag:x},backend:c});return c.disposeIntermediateTensorInfo(g),c.disposeIntermediateTensorInfo(x),b}let l=s||dt(p.dtype,u.dtype);if((p.dtype==="string"||u.dtype==="string"||c.shouldExecuteOnCPU([p,u]))&&n!=null){let f=c.texData.get(p.dataId).values,h=c.texData.get(u.dataId).values,g=p.dtype==="string"?S.fromUint8ToStringArray(f):f,x=p.dtype==="string"?S.fromUint8ToStringArray(h):h,[b,C]=n(p.shape,u.shape,g,x,l),w=c.makeTensorInfo(C,l),k=c.texData.get(w.dataId);return k.values=b,w}let m=O().getBool("WEBGL_PACK_BINARY_OPERATIONS")&&e!=null,d;return m?d=new _o(e,p.shape,u.shape,t):d=new io(r,p.shape,u.shape),c.runWebGLProgram(d,[p,u],l)}}function Ha(r,e=!1){if(r==="linear")return e?BE:DE;if(r==="relu")return e?zE:OE;if(r==="elu")return e?VE:FE;if(r==="relu6")return e?WE:PE;if(r==="prelu")return e?Ow:Fw;if(r==="leakyrelu")return e?Dw:Rw;if(r==="sigmoid")return e?UE:ME;throw new Error(`Activation ${r} has not been implemented for the WebGL backend.`)}var fc=class{constructor(e,t,o,n=!1,s=!1,a=!1,i=null,p=!1,u=!1){this.variableNames=["matrixA","matrixB"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=o,this.enableShapeUniforms=lt(this.outputShape.length);let c=n?e[1]:e[2],l=Math.ceil(c/2),m=n?"i * 2, rc.y":"rc.y, i * 2",d=s?"rc.z, i * 2":"i * 2, rc.z",f=n?["a.xxyy","a.zzww"]:["a.xxzz","a.yyww"],h=s?["b.xzxz","b.ywyw"]:["b.xyxy","b.zwzw"],g="",x="";i&&(p?g=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getPreluActivationWeightsAtOutCoords();
${i}
}`:u?g=`vec4 activation(vec4 a) {
vec4 b = getLeakyreluAlphaAtOutCoords();
${i}
}`:g=`vec4 activation(vec4 x) {
${i}
2023-01-06 19:23:06 +01:00
}`,x="result = activation(result);");let b=a?"result += getBiasAtOutCoords();":"";a&&this.variableNames.push("bias"),p&&this.variableNames.push("preluActivationWeights"),u&&this.variableNames.push("leakyreluAlpha");let C="rc.x",w="rc.x";e[0]<t[0]?C=`imod(rc.x, ${e[0]})`:t[0]<e[0]&&(w=`imod(rc.x, ${t[0]})`),this.userCode=`
2022-11-18 17:13:29 +01:00
${g}
// Don't use uniform for sharedDimensionPacked for performance.
const float sharedDimension = ${l}.0;
vec4 dot2x2ARowBCol(ivec3 rc) {
vec4 result = vec4(0);
2023-01-06 19:23:06 +01:00
int batchA = ${C};
int batchB = ${w};
2022-11-18 17:13:29 +01:00
for (int i = 0; i < ${l}; i++) {
vec4 a = getMatrixA(batchA, ${m});
2022-11-20 22:20:02 +01:00
vec4 b = getMatrixB(batchB, ${d});
2022-11-18 17:13:29 +01:00
// These swizzled products need to be separately added.
// See: https://github.com/tensorflow/tfjs/issues/1735
2022-11-20 22:20:02 +01:00
result += (${f[0]} * ${h[0]});
result += (${f[1]} * ${h[1]});
2022-11-18 17:13:29 +01:00
}
return result;
}
void main() {
ivec3 rc = getOutputCoords();
vec4 result = dot2x2ARowBCol(rc);
${b}
2022-11-20 22:20:02 +01:00
${x}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};var Pw={REAL:"return areal * breal - aimag * bimag;",IMAG:"return areal * bimag + aimag * breal;"},$l=class{constructor(e,t,o){this.variableNames=["AReal","AImag","BReal","BImag"],this.outputShape=S.assertAndGetBroadcastShape(t,o),this.userCode=`
2022-11-18 17:13:29 +01:00
float binaryOpComplex(
float areal, float aimag, float breal, float bimag) {
${e}
}
void main() {
float areal = getARealAtOutCoords();
float aimag = getAImagAtOutCoords();
float breal = getBRealAtOutCoords();
float bimag = getBImagAtOutCoords();
setOutput(binaryOpComplex(areal, aimag, breal, bimag));
}
2023-01-06 19:23:06 +01:00
`}};var XE="return a * b;";function El(r){let{inputs:e,backend:t}=r,{a:o,b:n}=e,s=S.upcastType(o.dtype,n.dtype);if(o.dtype==="complex64"){let i=t.texData.get(o.dataId),p=t.texData.get(n.dataId),u=new $l(Pw.REAL,o.shape,n.shape),c=new $l(Pw.IMAG,o.shape,n.shape),l=[{dataId:i.complexTensorInfos.real.dataId,dtype:i.complexTensorInfos.real.dtype,shape:o.shape},{dataId:i.complexTensorInfos.imag.dataId,dtype:i.complexTensorInfos.imag.dtype,shape:o.shape},{dataId:p.complexTensorInfos.real.dataId,dtype:p.complexTensorInfos.real.dtype,shape:n.shape},{dataId:p.complexTensorInfos.imag.dataId,dtype:p.complexTensorInfos.imag.dtype,shape:n.shape}],m=t.runWebGLProgram(u,l,"float32"),d=t.runWebGLProgram(c,l,"float32"),f=Ar({inputs:{real:m,imag:d},backend:t});return t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(d),f}if(t.shouldExecuteOnCPU([o,n])){let i=t.texData.get(o.dataId),p=t.texData.get(n.dataId),[u,c]=aE(o.shape,n.shape,i.values,p.values,s),l=t.makeTensorInfo(c,s),m=t.texData.get(l.dataId);return m.values=u,l}let a;return O().getBool("WEBGL_PACK_BINARY_OPERATIONS")?a=new _o(XE,o.shape,n.shape):a=new io(XE,o.shape,n.shape),t.runWebGLProgram(a,[o,n],s)}var YE={kernelName:kn,backendName:"webgl",kernelFunc:El};function QE(r,e,t){let o=[Ua(r.shape),...Ga(r.shape)],n={dtype:r.dtype,shape:o,dataId:r.dataId},s=[Ua(e),...Ga(e)],a=new mc(s,o),i=!0,p=[o],u=t.runWebGLProgram(a,[n],r.dtype,p,i);return{dataId:u.dataId,shape:e,dtype:u.dtype}}function re(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{shape:s}=o,a=t,i=y.sizeFromShape(n.shape),p=y.inferFromImplicitShape(s,i),u=y.sizeFromShape(p);y.assert(i===u,()=>`The new shape (${p}) has ${u} elements and the old shape (${n.shape}) has ${i} elements. The new shape and old shape must have the same number of elements.`);let c=a.texData.get(n.dataId);return c.isPacked&&!Bi(n.shape,p)&&!(c.texture!==null&&Bi(c.shape,p))?QE(n,p,a):(a.incRef(n.dataId),{dataId:n.dataId,shape:p,dtype:n.dtype})}var ZE={kernelName:_s,backendName:"webgl",kernelFunc:re};var Al=class{constructor(e,t){this.variableNames=["x"];let{windowSize:o,batchSize:n,inSize:s,outSize:a}=e;this.outputShape=[n,a];let i=Math.floor(o/4)*4,p=o%4,u="sumValue += dot(values, ones);";if(t!=null){let l=1/t;u=`sumValue += dot(values * ${y.isInt(l)?l.toPrecision(2):l}, ones);`}let c="";s%o>0&&(c=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return 0.0;
}
`),this.userCode=`
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float getValue(int batch, int inIdx) {
${c}
return getX(batch, inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${o};
float sumValue = 0.0;
for (int i = 0; i < ${i}; i += 4) {
int inIdx = inOffset + i;
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
${u}
}
int inIdx = inOffset + ${i};
if (${p===1}) {
vec4 values = vec4(getValue(batch, inIdx), 0.0, 0.0, 0.0);
${u}
} else if (${p===2}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1), 0.0, 0.0);
${u}
} else if (${p===3}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2), 0.0);
${u}
}
setOutput(sumValue);
}
2022-11-20 22:20:02 +01:00
`}};var Jf=class{constructor(e,t){this.variableNames=["x"];let{windowSize:o,batchSize:n,inSize:s,outSize:a}=e;this.outputShape=[n,a];let i="0.0",p="";t==="prod"?i="1.0":t==="min"?(i="1.0 / 1e-20",p="min"):t==="max"&&(i="-1.0 / 1e-20",p="max");let u=`${t}(${t}(${t}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;t==="sum"?u="sumValue":t==="prod"?u="prodValue":t==="all"?u="allValue":t==="any"&&(u="anyValue");let c=Math.floor(o/4)*4,l=o%4,m=`
2022-11-18 17:13:29 +01:00
if (${t==="sum"}) {
sumValue += dot(values, ones);
} else if (${t==="prod"}) {
vec2 tmp = vec2(values[0], values[1]) * vec2(values[2], values[3]);
prodValue *= tmp[0] * tmp[1];
} else {
minMaxValue = ${p}(values, minMaxValue);
if (${t==="min"} || ${t==="max"}) {
minMaxValue = ${p}(values, minMaxValue);
bvec4 isNaN = isnan(values);
if (isNaN.r || isNaN.g || isNaN.b || isNaN.a) {
minMaxValue = vec4(NAN);
}
}
}
2022-11-20 22:20:02 +01:00
`,d="vec4";t==="all"?(i="1.0",m=`
2022-11-18 17:13:29 +01:00
bool reducedAllValue = all(values);
float floatedReducedAllValue = float(reducedAllValue);
allValue = float(allValue >= 1.0 && floatedReducedAllValue >= 1.0);
2022-11-20 22:20:02 +01:00
`,d="bvec4"):t==="any"&&(i="0.0",m=`
2022-11-18 17:13:29 +01:00
bool reducedAnyValue = any(values);
float floatedReducedAnyValue = float(reducedAnyValue);
anyValue = float(anyValue >= 1.0 || floatedReducedAnyValue >= 1.0);
2022-11-20 22:20:02 +01:00
`,d="bvec4");let f="";s%o>0&&(f=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return initializationValue;
}
`),this.userCode=`
const float initializationValue = ${i};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float getValue(int batch, int inIdx) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
return getX(batch, inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${o};
vec4 minMaxValue = vec4(${i});
float prodValue = 1.0;
float sumValue = 0.0;
float allValue = 1.0;
float anyValue = 0.0;
for (int i = 0; i < ${c}; i += 4) {
int inIdx = inOffset + i;
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
${m}
}
int inIdx = inOffset + ${c};
if (${l===1}) {
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
initializationValue,
initializationValue,
initializationValue
);
${m}
} else if (${l===2}) {
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
initializationValue,
initializationValue
);
${m}
} else if (${l===3}) {
2022-11-20 22:20:02 +01:00
${d} values = ${d}(
2022-11-18 17:13:29 +01:00
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
initializationValue
);
${m}
}
setOutput(${u});
}
2023-01-06 19:23:06 +01:00
`}};function uY(r){let e=[];for(;e.length===0||e[e.length-1].outSize!==1;){let t=e.length?e[e.length-1].outSize:r[1],o=S.computeOptimalWindowSize(t);e.push({inSize:t,windowSize:o,outSize:Math.ceil(t/o)})}return e}function Ur(r,e,t,o){let n=uY(r.shape),s=r;for(let a=0;a<n.length;a++){let{inSize:i,windowSize:p,outSize:u}=n[a],c,l;t==="mean"?c=a===0?new Al({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u},i):new Al({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u}):c=new Jf({windowSize:p,inSize:i,batchSize:r.shape[0],outSize:u},t),l=s,s=o.runWebGLProgram(c,[s],e),l.dataId!==r.dataId&&o.disposeIntermediateTensorInfo(l)}return s}var eh=class{constructor(e,t){this.variableNames=["A"];let o=new Array(e.length);for(let a=0;a<o.length;a++)o[a]=e[t[a]];this.outputShape=o,this.rank=o.length;let n=$e(this.rank),s=pY(t);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} resRC = getOutputCoords();
setOutput(getA(${s}));
}
2023-01-06 19:23:06 +01:00
`}};function pY(r){let e=r.length;if(e>6)throw Error(`Transpose for rank ${e} is not yet supported`);let t=["resRC.x","resRC.y","resRC.z","resRC.w","resRC.u","resRC.v"],o=new Array(e);for(let n=0;n<r.length;n++)o[r[n]]=t[n];return o.join()}var th=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0;let o=new Array(e.length);for(let c=0;c<o.length;c++)o[c]=e[t[c]];if(this.outputShape=o,this.rank=o.length,this.rank>6)throw Error(`Packed transpose for rank ${this.rank} is not yet supported.`);let n=$e(this.rank),s=Ew("rc",this.rank),a=new Array(this.rank);for(let c=0;c<t.length;c++)a[t[c]]=s[c];let i=`vec2(${a.slice(-2).join()})`,p=`++${s[this.rank-1]} < ${o[this.rank-1]}`,u=`getChannel(getA(${a.join()}), ${i})`;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} rc = getOutputCoords();
vec4 result = vec4(0.);
result[0] = ${u};
if(${p}) {
result[1] = ${u};
}
--${s[this.rank-1]};
if(++${s[this.rank-2]} < ${o[this.rank-2]}) {
result[2] = ${u};
if(${p}) {
result[3] = ${u};
}
}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function zi(r,e,t){let o=O().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new th(r.shape,e):new eh(r.shape,e);return t.runWebGLProgram(o,[r],r.dtype)}function JE(r,e,t,o){let n=e,s=r.shape.length,a=y.parseAxisParam(n,r.shape),i=a,p=S.getAxesPermutation(i,s),u=p!=null,c=r;u&&(c=zi(r,p,o),i=S.getInnerMostAxes(i.length,s)),S.assertAxesAreInnerMostDims("sum",i,s);let[l,m]=S.computeOutAndReduceShapes(c.shape,i),d=l;t&&(d=S.expandShapeToKeepDim(l,a));let f=y.sizeFromShape(m),g=y.sizeFromShape(r.shape)/f,x=re({inputs:{x:c},attrs:{shape:[g,f]},backend:o}),b=Ta(r.dtype),C=Ur(x,b,"sum",o),w=re({inputs:{x:C},attrs:{shape:d},backend:o});return o.disposeIntermediateTensorInfo(x),o.disposeIntermediateTensorInfo(C),u&&o.disposeIntermediateTensorInfo(c),w}function Vu(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return JE(n,s,a,t)}var eA={kernelName:Kn,backendName:"webgl",kernelFunc:Vu};function yt(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{perm:s}=o,a=t,i=n.shape.length,p=new Array(i);for(let c=0;c<p.length;c++)p[c]=n.shape[s[c]];let u;if(a.shouldExecuteOnCPU([n])){let l=a.texData.get(n.dataId).values,m=Bu(l,n.shape,n.dtype,s,p);u=a.makeTensorInfo(p,n.dtype);let d=a.texData.get(u.dataId);d.values=m}else u=zi(n,s,a);return u}var tA={kernelName:ro,backendName:"webgl",kernelFunc:yt};var Mw=1e3;function zu({a:r,b:e,transposeA:t,transposeB:o,backend:n,bias:s=null,preluActivationWeights:a=null,leakyreluAlpha:i=0,activation:p=null}){let u=r.shape.length,c=e.shape.length,l=t?r.shape[u-2]:r.shape[u-1],m=o?e.shape[c-1]:e.shape[c-2],d=t?r.shape[u-1]:r.shape[u-2],f=o?e.shape[c-2]:e.shape[c-1],h=r.shape.slice(0,-2),g=e.shape.slice(0,-2),x=y.sizeFromShape(h),b=y.sizeFromShape(g),w=yr.assertAndGetBroadcastShape(r.shape.slice(0,-2),e.shape.slice(0,-2)).concat([d,f]);y.assert(l===m,()=>`Error in matMul: inner shapes (${l}) and (${m}) of Tensors with shapes ${r.shape} and ${e.shape} and transposeA=${t} and transposeB=${o} must match.`);let k=t?[x,l,d]:[x,d,l],_=o?[b,f,m]:[b,m,f],E=re({inputs:{x:r},backend:n,attrs:{shape:k}}),A=re({inputs:{x:e},backend:n,attrs:{shape:_}}),R=[E,A],D=Math.max(x,b),P=t?E.shape[1]:E.shape[2],M=s!=null,L=a!=null,V=p==="leakyrelu",z=p!=null?Ha(p,!0):null,U=M||L||V||z!=null,K;if((d===1||f===1)&&P>Mw&&U===!1){let q=E,Z=A;t&&(q=yt({inputs:{x:E},backend:n,attrs:{perm:[0,2,1]}}),R.push(q)),o&&(Z=yt({inputs:{x:A},backend:n,attrs:{perm:[0,2,1]}}),R.push(Z));let ee=f!==1,oe=f===1,J=q;ee&&(J=re({inputs:{x:q},backend:n,attrs:{shape:[D,P,1]}}),R.push(J));let te=f===1?2:1,ie=Z;oe&&(ie=re({inputs:{x:Z},backend:n,attrs:{shape:[D,1,P]}}),R.push(ie));let ce=El({inputs:{a:J,b:ie},backend:n});K=Vu({inputs:{x:ce},backend:n,attrs:{axis:te,keepDims:!0}}),R.push(ce)}else{let q=dt(r.dtype,e.dtype),Z=new fc(k,_,[D,d,f],t,o,M,z,L,V),ee=[E,A];if(s!=null&&ee.push(s),L&&ee.push(a),V){let oe=n.makeTensorInfo([],"float32",y.createScalarValue(i,"float32"));ee.push(oe),R.push(oe)}K=n.runWebGLProgram(Z,ee,q)}let H=re({inputs:{x:K},backend:n,attrs:{shape:w}});R.push(K);for(let q of R)n.disposeIntermediateTensorInfo(q);return H}function cY(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=e,{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o;return zu({a:n,b:s,transposeA:p,transposeB:u,backend:t,bias:a,preluActivationWeights:i,leakyreluAlpha:l,activation:c})}var rA={kernelName:ho,backendName:"webgl",kernelFunc:cY};var oA="return abs(x);";function lY(r){let{inputs:e,backend:t}=r,{x:o}=e;if(t.shouldExecuteOnCPU([o])&&o.dtype!=="complex64"){let s=t.texData.get(o.dataId),a=qf(s.values);return t.makeTensorInfo(o.shape,o.dtype,a)}let n;return O().getBool("WEBGL_PACK_UNARY_OPERATIONS")?n=new Er(o.shape,oA):n=new Jt(o.shape,oA),t.runWebGLProgram(n,[o],o.dtype)}var nA={kernelName:ys,backendName:"webgl",kernelFunc:lY};var mY=zt+`
2022-11-18 17:13:29 +01:00
if (abs(x) > 1.) {
return NAN;
}
return acos(x);
2023-01-06 19:23:06 +01:00
`,dY=he({opSnippet:mY}),sA={kernelName:aa,backendName:"webgl",kernelFunc:dY};var fY=zt+`
2022-11-18 17:13:29 +01:00
if (x < 1.0) return NAN;
2023-01-06 19:23:06 +01:00
return log(x + sqrt(x * x - 1.0));`,hY=he({opSnippet:fY}),aA={kernelName:ia,backendName:"webgl",kernelFunc:hY};var iA="return a + b;",gY=tt({opSnippet:iA,packedOpSnippet:iA,supportsComplex:!0,cpuKernelImpl:V$}),uA={kernelName:eo,backendName:"webgl",kernelFunc:gY};var rh=class{constructor(e,t){this.outputShape=[],this.outputShape=e,this.variableNames=t.map((s,a)=>`T${a}`);let o=[];this.variableNames.forEach(s=>{o.push(`float v${s} = get${s}AtOutCoords();`)});let n=this.variableNames.map(s=>`v${s}`).join(" + ");this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o.join(`
`)}
float result = ${n};
setOutput(result);
}
2022-11-20 22:20:02 +01:00
`}};var oh=class{constructor(e,t){this.outputShape=[],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=e,this.variableNames=t.map((s,a)=>`T${a}`);let o=[];this.variableNames.forEach(s=>{o.push(`vec4 v${s} = get${s}AtOutCoords();`)});let n=this.variableNames.map(s=>`v${s}`).join(" + ");this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o.join(`
`)}
vec4 result = ${n};
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function nh(r){let{inputs:e,backend:t}=r,o=e;if(o.length===1)return At({inputs:{x:o[0]},backend:t});if(o.length>O().get("WEBGL_MAX_TEXTURES_IN_SHADER")){let p=Math.floor(o.length/2),u=nh({inputs:o.slice(0,p),backend:t}),c=nh({inputs:o.slice(p),backend:t});return nh({inputs:[u,c],backend:t})}let n=o.map(p=>p.dtype).reduce((p,u)=>dt(p,u)),s=o.map(p=>p.shape),i=O().getBool("WEBGL_PACK")?new oh(o[0].shape,s):new rh(o[0].shape,s);return t.runWebGLProgram(i,o,n)}var pA={kernelName:Po,backendName:"webgl",kernelFunc:nh};function xY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=S.getAxesPermutation(u,i),l=n;c!=null&&(l=yt({inputs:{x:n},backend:t,attrs:{perm:c}}),u=S.getInnerMostAxes(u.length,i)),S.assertAxesAreInnerMostDims("all",u,i);let[m,d]=S.computeOutAndReduceShapes(l.shape,u),f=y.sizeFromShape(d),h=re({inputs:{x:l},backend:t,attrs:{shape:[-1,f]}}),g=Ur(h,h.dtype,"all",t),x;if(a){let b=S.expandShapeToKeepDim(m,p);x=re({inputs:{x:g},backend:t,attrs:{shape:b}})}else x=re({inputs:{x:g},backend:t,attrs:{shape:m}});return t.disposeIntermediateTensorInfo(h),t.disposeIntermediateTensorInfo(g),c!=null&&t.disposeIntermediateTensorInfo(l),x}var cA={kernelName:Mo,backendName:"webgl",kernelFunc:xY};function yY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=S.getAxesPermutation(u,i),l=n;c!=null&&(l=yt({inputs:{x:n},backend:t,attrs:{perm:c}}),u=S.getInnerMostAxes(u.length,i)),S.assertAxesAreInnerMostDims("any",u,i);let[m,d]=S.computeOutAndReduceShapes(l.shape,u),f=y.sizeFromShape(d),h=re({inputs:{x:l},backend:t,attrs:{shape:[-1,f]}}),g=Ur(h,h.dtype,"any",t),x;if(a){let b=S.expandShapeToKeepDim(m,p);x=re({inputs:{x:g},backend:t,attrs:{shape:b}})}else x=re({inputs:{x:g},backend:t,attrs:{shape:m}});return t.disposeIntermediateTensorInfo(h),t.disposeIntermediateTensorInfo(g),c!=null&&t.disposeIntermediateTensorInfo(l),x}var lA={kernelName:Lo,backendName:"webgl",kernelFunc:yY};var sh=class{constructor(e,t,o){this.variableNames=["A"];let{windowSize:n,batchSize:s,outSize:a}=e;o||this.variableNames.push("bestIndicesA"),this.outputShape=[s,a];let i=t==="max"?">":"<",p=o?"inOffset + i;":"round(getBestIndicesA(batch, inOffset + i));";this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = outIdx * ${n};
int bestIndex = inOffset;
float bestValue = getA(batch, bestIndex);
for (int i = 0; i < ${n}; i++) {
int inIdx = ${p};
float candidate = getA(batch, inIdx);
if (candidate ${i} bestValue) {
bestValue = candidate;
bestIndex = inIdx;
}
}
setOutput(float(bestIndex));
}
2023-01-06 19:23:06 +01:00
`}};var ah=class{constructor(e,t,o,n){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,y.assert(e.length>2,()=>`Packed arg${o.charAt(0).toUpperCase()+o.slice(1)} supports only inputs with rank above 2.`);let s=e[e.length-1],a=Math.ceil(s/t);this.outputShape=e.slice(0,-1),a>1&&this.outputShape.push(a),n||this.variableNames.push("bestIndicesA");let i=this.outputShape,p=i.length,u=$e(p),c=Et("coords",p),l,m;if(a===1){m=p+1;let A=$e(m);l=`
2022-11-20 22:20:02 +01:00
${A} sourceLocR = ${A}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
++${c[p-1]};
2022-11-20 22:20:02 +01:00
${A} sourceLocG = ${A}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
++${c[p-2]};
2022-11-20 22:20:02 +01:00
${A} sourceLocA = ${A}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
--${c[p-1]};
2022-11-20 22:20:02 +01:00
${A} sourceLocB = ${A}(${c.join()}, 0);
2022-11-18 17:13:29 +01:00
--${c[p-2]};`}else m=p,l=`
${u} sourceLocR = coords;
++${c[p-1]};
${u} sourceLocG = coords;
++${c[p-2]};
${u} sourceLocA = coords;
--${c[p-1]};
${u} sourceLocB = coords;
2023-01-06 19:23:06 +01:00
--${c[p-2]};`;let d=["x","y","z","w","u","v"].slice(0,m),f="."+d[m-1],h=d.map(A=>"int "+A),g=Et("sourceLocR",m-1).concat("inIdx.r"),x=Et("sourceLocG",m-1).concat("inIdx.g"),b=Et("sourceLocB",m-1).concat("inIdx.b"),C=Et("sourceLocA",m-1).concat("inIdx.a"),w=o==="max"?"greaterThan":"lessThan",k=n?"":`
2022-11-18 17:13:29 +01:00
inIdx = round(vec4(getBestIndicesAChannel(${g.join()}),
2022-11-20 22:20:02 +01:00
getBestIndicesAChannel(${x.join()}),
2022-11-18 17:13:29 +01:00
getBestIndicesAChannel(${b.join()}),
getBestIndicesAChannel(${C.join()})));`,_=`vec4(
getAChannel(${g.join()}),
2022-11-20 22:20:02 +01:00
hasNextCol ? getAChannel(${x.join()}) : 0.,
2022-11-18 17:13:29 +01:00
hasNextRow ? getAChannel(${b.join()}) : 0.,
2023-01-06 19:23:06 +01:00
hasNextRow && hasNextCol ? getAChannel(${C.join()}) : 0.)`,E=n?"":`
2022-11-18 17:13:29 +01:00
float getBestIndicesAChannel(${h.join()}) {
2022-11-20 22:20:02 +01:00
return getChannel(getBestIndicesA(${d.join()}),
vec2(${d.slice(-2).join()}));
2022-11-18 17:13:29 +01:00
}`;this.userCode=`
float getAChannel(${h.join()}) {
2022-11-20 22:20:02 +01:00
return getChannel(getA(${d.join()}),
vec2(${d.slice(-2).join()}));
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
void main() {
${u} coords = getOutputCoords();
bool hasNextCol = ${c[p-1]} < ${i[p-1]-1};
bool hasNextRow = ${c[p-2]} < ${i[p-2]-1};
${l}
2022-11-20 22:20:02 +01:00
ivec4 srcIdx = ivec4(sourceLocR${f}, sourceLocG${f},
sourceLocB${f}, sourceLocA${f}) * ${t};
2022-11-18 17:13:29 +01:00
ivec4 inIdx = srcIdx;
vec4 bestIndex = vec4(inIdx);
vec4 bestValue = ${_};
for (int i = 0; i < ${t}; i++) {
inIdx = srcIdx;
${k}
vec4 candidate = ${_};
bvec4 nan = isnan(candidate);
bvec4 replace = bvec4(
vec4(${w}(candidate, bestValue)) * (vec4(1.0) - vec4(nan)));
bestValue = vec4(replace.x ? candidate.x : bestValue.x,
replace.y ? candidate.y : bestValue.y,
replace.z ? candidate.z : bestValue.z,
replace.w ? candidate.w : bestValue.w);
bestIndex = mix(bestIndex, vec4(inIdx), vec4(replace));
srcIdx++;
}
setOutput(bestIndex);
}
2023-01-06 19:23:06 +01:00
`}};function mA(r,e,t,o=null){let n=e.shape[0],s=e.shape[1];o!=null&&(n=o.shape[0],s=o.shape[1]);let a=S.computeOptimalWindowSize(s),i={windowSize:a,inSize:s,batchSize:n,outSize:Math.ceil(s/a)},p=new sh(i,t,o==null),u=[e];o!=null&&u.push(o);let c=r.runWebGLProgram(p,u,"int32");if(c.shape[1]===1)return c;let l=mA(r,e,t,c);return r.disposeIntermediateTensorInfo(c),l}function dA(r,e,t,o=null){let n=o!=null?o.shape:e.shape,s=n[n.length-1],a=S.computeOptimalWindowSize(s),i=new ah(n,a,t,o==null),p=o==null?[e]:[e,o],u=r.runWebGLProgram(i,p,"int32");if(u.shape.length===e.shape.length){let c=dA(r,e,t,u);return r.disposeIntermediateTensorInfo(u),c}return u}function ih(r,e,t,o){let n=[t];if(S.assertAxesAreInnerMostDims("arg"+o.charAt(0).toUpperCase()+o.slice(1),n,e.shape.length),!O().getBool("WEBGL_PACK_REDUCE")||e.shape.length<=2){let s=[],a=r.texData.get(e.dataId),i=a!==null&&a.isPacked,p=e;i&&(p=r.unpackTensor(e),s.push(p));let[u,c]=S.computeOutAndReduceShapes(p.shape,n),l=y.sizeFromShape(c),m=re({inputs:{x:p},backend:r,attrs:{shape:[-1,l]}});s.push(m);let d=mA(r,m,o);s.push(d);let f=re({inputs:{x:d},backend:r,attrs:{shape:u}});return s.forEach(h=>r.disposeIntermediateTensorInfo(h)),f}return dA(r,e,o)}function bY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=S.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=yt({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=S.getInnerMostAxes(a.length,p.shape.length)),S.assertAxesAreInnerMostDims("argMax",[a[0]],p.shape.length);let c=ih(t,p,a[0],"max");return u.forEach(l=>t.disposeIntermediateTensorInfo(l)),c}var fA={kernelName:Bo,backendName:"webgl",kernelFunc:bY};function CY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=S.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=yt({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=S.getInnerMostAxes(a.length,p.shape.length)),S.assertAxesAreInnerMostDims("argMin",[a[0]],p.shape.length);let c=ih(t,p,a[0],"min");return u.forEach(l=>t.disposeIntermediateTensorInfo(l)),c}var hA={kernelName:ei,backendName:"webgl",kernelFunc:CY};var SY=zt+`
2022-11-18 17:13:29 +01:00
if (abs(x) > 1.) {
return NAN;
}
return asin(x);
2023-01-06 19:23:06 +01:00
`,wY=he({opSnippet:SY}),gA={kernelName:ua,backendName:"webgl",kernelFunc:wY};var IY=zt+"return log(x + sqrt(x * x + 1.0));",vY=he({opSnippet:IY}),xA={kernelName:pa,backendName:"webgl",kernelFunc:vY};var kY=zt+`
2022-11-18 17:13:29 +01:00
return atan(x);
2023-01-06 19:23:06 +01:00
`,NY=he({opSnippet:kY}),yA={kernelName:ca,backendName:"webgl",kernelFunc:NY};var TY=dc+`
2022-11-18 17:13:29 +01:00
return atan(a, b);
2023-01-06 19:23:06 +01:00
`,_Y=`
2022-11-18 17:13:29 +01:00
vec4 result = atan(a, b);
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
2023-01-06 19:23:06 +01:00
`+Js+`
2022-11-18 17:13:29 +01:00
return result;
2023-01-06 19:23:06 +01:00
`,$Y=tt({opSnippet:TY,packedOpSnippet:_Y}),bA={kernelName:ma,backendName:"webgl",kernelFunc:$Y};var EY=zt+`
2022-11-18 17:13:29 +01:00
if ((x < -1.0) || (x > 1.0)) return NAN;
2023-01-06 19:23:06 +01:00
return (log(1.0 + x) - log(1.0 - x)) / 2.0;`,AY=he({opSnippet:EY}),CA={kernelName:la,backendName:"webgl",kernelFunc:AY};var cs=class{constructor(e,t,o,n=!1,s=!1){if(this.variableNames=["x"],t==="avg"&&o)throw new Error("Cannot compute positions for average pool.");let a=e.filterWidth,i=e.strideHeight,p=e.strideWidth,u=e.dilationHeight,c=e.dilationWidth,l=e.effectiveFilterHeight,m=e.effectiveFilterWidth,d=e.padInfo.top,f=e.padInfo.left;this.outputShape=e.outShape;let h=t==="avg",g=`((batch * ${e.inHeight} + xR) * ${e.inWidth} + xC) * ${e.inChannels} + d`,x=`(xR * ${e.inWidth} + xC) * ${e.inChannels} + d`,b="0.0";if(h||(b="-1.0 / 1e-20"),o){let A=">=";this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 strides = ivec2(${i}, ${p});
2022-11-20 22:20:02 +01:00
const ivec2 pads = ivec2(${d}, ${f});
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d = coords[3];
ivec2 xRCCorner = coords.yz * strides - pads;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// max/min x(?, ?, d) to get y(yR, yC, d).
// ? = to be determined
float minMaxValue = 0.0;
float minMaxValueFound = 0.0;
int minMaxPosition = 0;
float avgValue = 0.0;
for (int wR = 0; wR < ${l};
wR += ${u}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${m};
wC += ${c}) {
int xC = xCCorner + wC;
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float value = getX(batch, xR, xC, d);
// If a min / max value has already been found, use it. If not,
// use the current value.
float currMinMaxValue = mix(
value, minMaxValue, minMaxValueFound);
2022-11-20 22:20:02 +01:00
if (value ${A} currMinMaxValue) {
2022-11-18 17:13:29 +01:00
minMaxValue = value;
minMaxValueFound = 1.0;
2022-11-20 22:20:02 +01:00
minMaxPosition = ${n?s?g:x:`wR * ${m} + wC`};
2022-11-18 17:13:29 +01:00
}
}
}
setOutput(float(minMaxPosition));
}
2023-01-06 19:23:06 +01:00
`;return}let C="max",w=`${t}(${t}(${t}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;t==="avg"&&(w="avgValue / max(count, 1.0)");let k=Math.floor(a/4)*4,_=a%4,E=`
2022-11-18 17:13:29 +01:00
if (${h}) {
avgValue += dot(values, ones);
} else {
minMaxValue = ${C}(values, minMaxValue);
}
`;this.userCode=`
const ivec2 strides = ivec2(${i}, ${p});
2022-11-20 22:20:02 +01:00
const ivec2 pads = ivec2(${d}, ${f});
2022-11-18 17:13:29 +01:00
const float initializationValue = ${b};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float count = 0.0;
float getValue(int batch, int xR, int xC, int d) {
if (xC < 0 || xC >= ${e.inWidth}) {
return initializationValue;
}
count += 1.0;
return getX(batch, xR, xC, d);
}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d = coords[3];
ivec2 xRCCorner = coords.yz * strides - pads;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// max/min x(?, ?, d) to get y(yR, yC, d).
// ? = to be determined
vec4 minMaxValue = vec4(${b});
float avgValue = 0.0;
count = 0.0;
for (int wR = 0; wR < ${l};
wR += ${u}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${k}; wC += 4) {
int xC = xCCorner + wC * ${c};
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
getValue(batch, xR, xC + 2 * ${c}, d),
getValue(batch, xR, xC + 3 * ${c}, d)
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
}
int xC = xCCorner + ${k};
if (${_===1}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
initializationValue,
initializationValue,
initializationValue
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
} else if (${_===2}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
initializationValue,
initializationValue
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
} else if (${_===3}) {
vec4 values = vec4(
getValue(batch, xR, xC, d),
getValue(batch, xR, xC + ${c}, d),
getValue(batch, xR, xC + 2 * ${c}, d),
initializationValue
);
2023-01-06 19:23:06 +01:00
${E}
2022-11-18 17:13:29 +01:00
}
}
setOutput(${w});
}
2023-01-06 19:23:06 +01:00
`}},Wi=class{constructor(e,t,o,n=!1,s=!1){if(this.variableNames=["x"],t==="avg"&&o)throw new Error("Cannot compute positions for average pool.");let a=e.filterWidth,i=e.strideDepth,p=e.strideHeight,u=e.strideWidth,c=e.dilationDepth,l=e.dilationHeight,m=e.dilationWidth,d=e.effectiveFilterDepth,f=e.effectiveFilterHeight,h=e.effectiveFilterWidth,g=e.padInfo.front,x=e.padInfo.top,b=e.padInfo.left;this.outputShape=e.outShape;let C=t==="avg",w="0.0";if(C||(w="-1.0 / 1e-20"),o){let D=">=";this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec3 strides =
ivec3(${i}, ${p}, ${u});
2022-11-20 22:20:02 +01:00
const ivec3 pads = ivec3(${g}, ${x}, ${b});
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 xCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xDCorner = xCorner.x;
int xRCorner = xCorner.y;
int xCCorner = xCorner.z;
// max/min x(?, ?, ?, ch) to get y(yD, yR, yC, ch).
// ? = to be determined
float minMaxValue = 0.0;
float minMaxValueFound = 0.0;
int minMaxPosition = 0;
2022-11-20 22:20:02 +01:00
for (int wD = 0; wD < ${d};
2022-11-18 17:13:29 +01:00
wD += ${c}) {
int xD = xDCorner + wD;
if (xD < 0 || xD >= ${e.inDepth}) {
continue;
}
2022-11-20 22:20:02 +01:00
for (int wR = 0; wR < ${f};
2022-11-18 17:13:29 +01:00
wR += ${l}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int wC = 0; wC < ${h};
wC += ${m}) {
int xC = xCCorner + wC;
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float value = getX(batch, xD, xR, xC, ch);
// If a min / max value has already been found, use it. If not,
// use the current value.
float currMinMaxValue = mix(
value, minMaxValue, minMaxValueFound);
if (value ${D} currMinMaxValue) {
minMaxValue = value;
minMaxValueFound = 1.0;
2022-11-20 22:20:02 +01:00
minMaxPosition = ${n?s?`(((batch * ${e.inDepth} + xD) * ${e.inHeight} + xR) * ${e.inWidth} + xC) * ${e.inChannels} + ch`:`((xD * ${e.inHeight} + xR) * ${e.inWidth} + xC) * ${e.inChannels} + ch`:`wD * ${f} * ${h} +
2022-11-18 17:13:29 +01:00
wR * ${h} + wC`};
}
}
}
}
setOutput(float(minMaxPosition));
}
2023-01-06 19:23:06 +01:00
`;return}let k="max",_=`${t}(${t}(${t}(minMaxValue[0], minMaxValue[1]), minMaxValue[2]), minMaxValue[3])`;t==="avg"&&(_="avgValue / max(count, 1.0)");let E=Math.floor(a/4)*4,A=a%4,R=`
2022-11-18 17:13:29 +01:00
if (${C}) {
avgValue += dot(values, ones);
} else {
minMaxValue = ${k}(values, minMaxValue);
}
`;this.userCode=`
const ivec3 strides =
ivec3(${i}, ${p}, ${u});
2022-11-20 22:20:02 +01:00
const ivec3 pads = ivec3(${g}, ${x}, ${b});
2022-11-18 17:13:29 +01:00
const float initializationValue = ${w};
const vec4 ones = vec4(1.0, 1.0, 1.0, 1.0);
float count = 0.0;
float getValue(int batch, int xD, int xR, int xC, int ch) {
if (xC < 0 || xC >= ${e.inWidth}) {
return initializationValue;
}
count += 1.0;
return getX(batch, xD, xR, xC, ch);
}
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 xCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xDCorner = xCorner.x;
int xRCorner = xCorner.y;
int xCCorner = xCorner.z;
// max/min x(?, ?, ?, d) to get y(yD, yR, yC, ch).
// ? = to be determined
vec4 minMaxValue = vec4(${w});
float avgValue = 0.0;
count = 0.0;
2022-11-20 22:20:02 +01:00
for (int wD = 0; wD < ${d};
2022-11-18 17:13:29 +01:00
wD += ${c}) {
int xD = xDCorner + wD;
if (xD < 0 || xD >= ${e.inDepth}) {
continue;
}
2022-11-20 22:20:02 +01:00
for (int wR = 0; wR < ${f};
2022-11-18 17:13:29 +01:00
wR += ${l}) {
int xR = xRCorner + wR;
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
2023-01-06 19:23:06 +01:00
for (int wC = 0; wC < ${E}; wC += 4) {
2022-11-18 17:13:29 +01:00
int xC = xCCorner + wC * ${m};
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
getValue(batch, xD, xR, xC + 2 * ${m}, ch),
getValue(batch, xD, xR, xC + 3 * ${m}, ch)
);
2022-11-20 22:20:02 +01:00
${R}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
int xC = xCCorner + ${E};
2022-11-20 22:20:02 +01:00
if (${A===1}) {
2022-11-18 17:13:29 +01:00
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
initializationValue,
initializationValue,
initializationValue
);
2022-11-20 22:20:02 +01:00
${R}
} else if (${A===2}) {
2022-11-18 17:13:29 +01:00
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
initializationValue,
initializationValue
);
2022-11-20 22:20:02 +01:00
${R}
} else if (${A===3}) {
2022-11-18 17:13:29 +01:00
vec4 values = vec4(
getValue(batch, xD, xR, xC, ch),
getValue(batch, xD, xR, xC + ${m}, ch),
getValue(batch, xD, xR, xC + 2 * ${m}, ch),
initializationValue
);
2022-11-20 22:20:02 +01:00
${R}
2022-11-18 17:13:29 +01:00
}
}
}
2023-01-06 19:23:06 +01:00
setOutput(${_});
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`}};function RY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e;us(n,"avgPool");let{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1;y.assert(S.eitherStridesOrDilationsAreOne(a,u),()=>`Error in avgPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${u}'`);let c=S.computePool2DInfo(n.shape,s,a,u,i,p);if(c.filterWidth===1&&c.filterHeight===1&&y.arraysEqual(c.inShape,c.outShape))return At({inputs:{x:n},backend:t});let l=new cs(c,"avg",!1);return t.runWebGLProgram(l,[n],"float32")}var SA={kernelName:Vo,backendName:"webgl",kernelFunc:RY};function DY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dimRoundingMode:p,dataFormat:u}=o,c=[1,1,1],l=S.computePool3DInfo(n.shape,s,a,c,i,p,u),m=new Wi(l,"avg",!1);return t.runWebGLProgram(m,[n],"float32")}var wA={kernelName:dp,backendName:"webgl",kernelFunc:DY};var uh=class{constructor(e){this.variableNames=["dy"],this.outputShape=e.inShape;let t=e.filterHeight,o=e.filterWidth,n=e.strideHeight,s=e.strideWidth,a=e.dilationHeight,i=e.dilationWidth,p=e.effectiveFilterHeight,u=e.effectiveFilterWidth,c=p-1-e.padInfo.top,l=u-1-e.padInfo.left,m=1/(t*o);this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${c}, ${l});
const float avgMultiplier = float(${m});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 dyRCCorner = coords.yz - pads;
int dyRCorner = dyRCCorner.x;
int dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${p};
wR += ${a}) {
float dyR = float(dyRCorner + wR) / ${n}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${u};
wC+= ${i}) {
float dyC = float(dyCCorner + wC) / ${s}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(b, idyR, idyC, d);
dotProd += dyValue * avgMultiplier;
}
}
setOutput(dotProd);
}
2022-11-20 22:20:02 +01:00
`}},ph=class{constructor(e){this.variableNames=["dy"],this.outputShape=e.inShape;let t=e.filterDepth,o=e.filterHeight,n=e.filterWidth,s=e.strideDepth,a=e.strideHeight,i=e.strideWidth,p=e.dilationDepth,u=e.dilationHeight,c=e.dilationWidth,l=e.effectiveFilterDepth,m=e.effectiveFilterHeight,d=e.effectiveFilterWidth,f=l-1-e.padInfo.front,h=m-1-e.padInfo.top,g=d-1-e.padInfo.left,x=1/(t*o*n);this.userCode=`
const ivec3 pads = ivec3(${f}, ${h}, ${g});
const float avgMultiplier = float(${x});
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyDCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, d) with pos mask(:, :, :, ch) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wD = 0; wD < ${l};
wD += ${p}) {
float dyD = float(dyDCorner + wD) / ${s}.0;
if (dyD < 0.0 || dyD >= ${e.outDepth}.0 || fract(dyD) > 0.0) {
continue;
}
int idyD = int(dyD);
for (int wR = 0; wR < ${m};
wR += ${u}) {
float dyR = float(dyRCorner + wR) / ${a}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 ||
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
2022-11-20 22:20:02 +01:00
for (int wC = 0; wC < ${d};
2022-11-18 17:13:29 +01:00
wC += ${c}) {
float dyC = float(dyCCorner + wC) / ${i}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(batch, idyD, idyR, idyC, ch);
dotProd += dyValue * avgMultiplier;
}
}
}
setOutput(dotProd);
}
2023-01-06 19:23:06 +01:00
`}};function FY(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s}=e,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=[1,1,1],m=S.computePool3DInfo(a.shape,i,p,l,u,c),d=new ph(m);return t.runWebGLProgram(d,[n],a.dtype)}var IA={kernelName:vm,backendName:"webgl",kernelFunc:FY};function OY(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s}=e,a=s;us([n,s],"avgPoolGrad");let{filterSize:i,strides:p,pad:u}=o,c=S.computePool2DInfo(a.shape,i,p,1,u),l=new uh(c);return t.runWebGLProgram(l,[n],a.dtype)}var vA={kernelName:mp,backendName:"webgl",kernelFunc:OY};function PY(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s}=e,{transposeA:a,transposeB:i}=o;return zu({a:n,b:s,transposeA:a,transposeB:i,backend:t})}var kA={kernelName:zo,backendName:"webgl",kernelFunc:PY};var ch=class{constructor(e,t,o,n,s,a){this.outputShape=[],this.variableNames=["x","mean","variance"],S.assertAndGetBroadcastShape(e,t),S.assertAndGetBroadcastShape(e,o);let i="0.0";n!=null&&(S.assertAndGetBroadcastShape(e,n),this.variableNames.push("offset"),i="getOffsetAtOutCoords()");let p="1.0";s!=null&&(S.assertAndGetBroadcastShape(e,s),this.variableNames.push("scale"),p="getScaleAtOutCoords()"),this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
float x = getXAtOutCoords();
float mean = getMeanAtOutCoords();
float variance = getVarianceAtOutCoords();
float offset = ${i};
float scale = ${p};
float inv = scale * inversesqrt(variance + float(${a}));
setOutput(dot(vec3(x, -mean, offset), vec3(inv, inv, 1)));
}
2022-11-20 22:20:02 +01:00
`}};var lh=class{constructor(e,t,o,n,s,a){this.packedInputs=!0,this.packedOutput=!0,this.variableNames=["x","mean","variance"],S.assertAndGetBroadcastShape(e,t),S.assertAndGetBroadcastShape(e,o);let i="vec4(0.0)";n!=null&&(S.assertAndGetBroadcastShape(e,n),this.variableNames.push("offset"),i="getOffsetAtOutCoords()");let p="vec4(1.0)";s!=null&&(S.assertAndGetBroadcastShape(e,s),this.variableNames.push("scale"),p="getScaleAtOutCoords()"),this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
vec4 offset = ${i};
vec4 scale = ${p};
vec4 x = getXAtOutCoords();
vec4 mean = getMeanAtOutCoords();
vec4 variance = getVarianceAtOutCoords();
vec4 inv = scale * inversesqrt(variance + vec4(${a}));
setOutput((x - mean) * inv + offset);
}
2023-01-06 19:23:06 +01:00
`}};var MY=({inputs:r,backend:e,attrs:t})=>{let{x:o,mean:n,variance:s,offset:a,scale:i}=r;y.assert(n.shape.length===s.shape.length,()=>"Batch normalization gradient requires mean and variance to have equal ranks."),y.assert(a==null||n.shape.length===a.shape.length,()=>"Batch normalization gradient requires mean and offset to have equal ranks."),y.assert(i==null||n.shape.length===i.shape.length,()=>"Batch normalization gradient requires mean and scale to have equal ranks.");let{varianceEpsilon:p}=t;p==null&&(p=.001);let u=[o,n,s],c=null;a!=null&&(c=a.shape,u.push(a));let l=null;i!=null&&(l=i.shape,u.push(i));let m=O().getBool("WEBGL_PACK_NORMALIZATION")?new lh(o.shape,n.shape,s.shape,c,l,p):new ch(o.shape,n.shape,s.shape,c,l,p);return e.runWebGLProgram(m,u,u[0].dtype)},NA={kernelName:sn,backendName:"webgl",kernelFunc:MY};var mh=class{constructor(e){this.variableNames=["source"],this.outputShape=e,this.rank=e.length;let t=$e(this.rank);this.customUniforms=[{name:"start",arrayIndex:this.rank,type:"int"}];let o=LY(this.rank),n,s=e.map((a,i)=>`sourceLoc.${Lw[i]} = start[${i}] + coords.${Lw[i]};`);n=`
2022-11-18 17:13:29 +01:00
${t} sourceLoc;
${t} coords = getOutputCoords();
${s.join(`
`)}
`,this.userCode=`
void main() {
${n}
setOutput(getSource(${o}));
}
2023-01-06 19:23:06 +01:00
`}},Lw=["x","y","z","w","u","v"];function LY(r){if(r===1)return"sourceLoc";if(r<=6)return Lw.slice(0,r).map(e=>"sourceLoc."+e).join(",");throw Error(`Slicing for rank ${r} is not yet supported`)}var dh=class{constructor(e){this.variableNames=["source"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=e,this.rank=e.length,this.customUniforms=[{name:"start",arrayIndex:this.rank,type:"int"}];let t=$e(this.rank),o=Et("coords",this.rank),n=Et("sourceLoc",this.rank),s=this.rank===1?"sourceLoc":`vec2(${n.slice(-2).join()})`,a=`getChannel(getSource(${n.join()}), ${s})`,i=`
2022-11-18 17:13:29 +01:00
result.x = ${a};
if (++${o[this.rank-1]} < ${e[this.rank-1]}) {
++${n[this.rank-1]};
result.y = ${a};
--${n[this.rank-1]};
}
`,p=this.rank===1?"":`
--${o[this.rank-1]};
if (++${o[this.rank-2]} < ${e[this.rank-2]}) {
++${n[this.rank-2]};
result.z = ${a};
if (++${o[this.rank-1]} < ${e[this.rank-1]}) {
++${n[this.rank-1]};
result.w = ${a};
}
}
`,u=this.rank<=4?`sourceLoc = coords +
${t}(${e.map((c,l)=>`start[${l}]`).join()});`:e.map((c,l)=>`${n[l]} = ${o[l]} + start[${l}];`).join(`
`);this.userCode=`
void main() {
${t} coords = getOutputCoords();
${t} sourceLoc;
${u}
vec4 result = vec4(0.);
${i}
${p}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function BY(r,e,t,o){let n=o.texData.get(r.dataId),s=o.makeTensorInfo(t,r.dtype),a=o.texData.get(s.dataId);Object.assign(a,n),a.refCount=1,a.shape=t,a.dtype=r.dtype;let i=ut.computeFlatOffset(e,y.computeStrides(r.shape));n.slice&&(i+=n.slice.flatOffset),a.slice={flatOffset:i,origDataId:n.slice&&n.slice.origDataId||r.dataId};let p=o.dataRefCount.get(a.slice.origDataId)||1;return o.dataRefCount.set(a.slice.origDataId,p+1),s}function ls(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,size:a}=o,[i,p]=ut.parseSliceParams(n,s,a);if(ut.assertParamsValid(n,i,p),y.sizeFromShape(p)===0)return t.makeTensorInfo(p,n.dtype,[]);if(t.shouldExecuteOnCPU([n])||n.dtype==="string"){let l=t.texData.get(n.dataId),m=xE(l.values,i,p,n.shape,n.dtype);return t.makeTensorInfo(p,n.dtype,m)}let{isPacked:u}=t.texData.get(n.dataId),c=ut.isSliceContinous(n.shape,i,p);if(u||!c){let l=O().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new dh(p):new mh(p),m=[i];return t.runWebGLProgram(l,[n],n.dtype,m)}return t.uploadToGPU(n.dataId),BY(n,i,p,t)}var TA={kernelName:Es,backendName:"webgl",kernelFunc:ls};var VY=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,crops:a}=o;y.assert(n.shape.length<=4,()=>"batchToSpaceND for rank > 4 with a WebGL backend not implemented yet");let i=s.reduce((b,C)=>b*C),p=S.getReshaped(n.shape,s,i),u=S.getPermuted(p.length,s.length),c=S.getReshapedPermuted(n.shape,s,i),l=S.getSliceBeginCoords(a,s.length),m=S.getSliceSize(c,a,s.length),d=[],f=re({inputs:{x:n},backend:t,attrs:{shape:p}}),h=yt({inputs:{x:f},backend:t,attrs:{perm:u}}),g=re({inputs:{x:h},backend:t,attrs:{shape:c}}),x=ls({inputs:{x:g},backend:t,attrs:{begin:l,size:m}});return d.push(f),d.push(h),d.push(g),d.forEach(b=>t.disposeIntermediateTensorInfo(b)),x},_A={kernelName:bs,backendName:"webgl",kernelFunc:VY};function zY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,weights:s}=e,{size:a}=o,i=t.readSync(n.dataId),p=t.readSync(s.dataId),u=Kf(i,p,s.dtype,s.shape,a);return t.makeTensorInfo([a],s.dtype,u)}var $A={kernelName:ti,backendName:"webgl",kernelFunc:zY};function WY(r){let{inputs:e,backend:t}=r,{s0:o,s1:n}=e,s=t.readSync(o.dataId),a=t.readSync(n.dataId),i=S.assertAndGetBroadcastShape(Array.from(s),Array.from(a));return t.makeTensorInfo([i.length],"int32",Int32Array.from(i))}var EA={kernelName:fp,backendName:"webgl",kernelFunc:WY};var UY="return float(a != b);",Bw=tt({opSnippet:UY,cpuKernelImpl:uE,dtype:"bool"}),AA={kernelName:Nn,backendName:"webgl",kernelFunc:Bw};function Ka(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.texData.get(o.dataId);return At({inputs:{x:n.complexTensorInfos.real},backend:t})}var RA={kernelName:di,backendName:"webgl",kernelFunc:Ka};var GY="return float(int(x));";function DA(r,e){let t=new Jt(r.shape,GY),o=e.runWebGLProgram(t,[r],"int32");return{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}function Vw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{dtype:s}=o;if(s==="complex64"){if(n.dtype==="complex64")return At({inputs:{x:n},backend:t});let a=Br(n.shape),i=Vw({inputs:{x:n},backend:t,attrs:{dtype:"float32"}}),p=Ar({inputs:{real:i,imag:a},backend:t});return a.dispose(),t.disposeIntermediateTensorInfo(i),p}if(n.dtype==="complex64"){let a=Ka({inputs:{input:n},backend:t}),i=Vw({inputs:{x:a},backend:t,attrs:{dtype:s}});return t.disposeIntermediateTensorInfo(a),i}if(!y.hasEncodingLoss(n.dtype,s)){let a=At({inputs:{x:n},backend:t});return{dataId:a.dataId,shape:a.shape,dtype:s}}if(t.shouldExecuteOnCPU([n])){let a=t.texData.get(n.dataId).values,[i,p,u]=W$(a,n.shape,n.dtype,s);return t.makeTensorInfo(i,p,u)}if(s==="int32")return DA(n,t);if(s==="bool"){let a=t.makeTensorInfo([],"bool",y.getTypedArrayFromDType("bool",1)),p=Bw({inputs:{a:n,b:a},backend:t});return t.disposeIntermediateTensorInfo(a),p}throw new Error(`Error in Cast: failed to cast ${n.dtype} to ${s}`)}var FA={kernelName:co,backendName:"webgl",kernelFunc:Vw};var OA="return ceil(x);",HY=he({opSnippet:OA,packedOpSnippet:OA,cpuKernelImpl:U$}),PA={kernelName:Wo,backendName:"webgl",kernelFunc:HY};var fh=class{constructor(e){this.variableNames=["A"],this.customUniforms=[{name:"minVal",type
2022-11-18 17:13:29 +01:00
void main() {
float value = getAAtOutCoords();
if (isnan(value)) {
setOutput(value);
return;
}
setOutput(clamp(value, minVal, maxVal));
}
2022-11-20 22:20:02 +01:00
`}};var hh=class{constructor(e){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"minVal",type:"float"},{name:"maxVal",type:"float"}],this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
vec4 value = getAAtOutCoords();
if (any(isnan(value))) {
setOutput(value);
return;
}
setOutput(clamp(value, vec4(minVal), vec4(maxVal)));
}
2023-01-06 19:23:06 +01:00
`}};function KY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{clipValueMin:s,clipValueMax:a}=o,i;O().getBool("WEBGL_PACK_CLIP")?i=new hh(n.shape):i=new fh(n.shape);let p=[[s],[a]];return t.runWebGLProgram(i,[n],n.dtype,p)}var MA={kernelName:lo,backendName:"webgl",kernelFunc:KY};var gh=class{constructor(e){this.variableNames=["real","imag"],this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
float re = abs(getRealAtOutCoords());
float im = abs(getImagAtOutCoords());
float mx = max(re, im);
// sadly the length function in glsl is not underflow-safe
// (at least not on Intel GPUs). So the safe solution is
// to ensure underflow-safety in all cases.
setOutput(
mx == 0.0 ? 0.0 : mx * length(vec2(1, min(re, im)/mx))
);
}
2023-01-06 19:23:06 +01:00
`}};function LA(r,e){return{dataId:e.dataId,dtype:e.dtype,shape:r.shape}}function qY(r){let{inputs:e,backend:t}=r,{x:o}=e,n=t.texData.get(o.dataId),s=new gh(o.shape),a=[LA(o,n.complexTensorInfos.real),LA(o,n.complexTensorInfos.imag)];return t.runWebGLProgram(s,a,a[0].dtype)}var BA={kernelName:hp,backendName:"webgl",kernelFunc:qY};var xh=class{constructor(e){this.outputShape=[],this.outputShape=S.computeOutShape(e,1),this.variableNames=e.map((a,i)=>`T${i}`);let t=new Array(e.length-1);t[0]=e[0][1];for(let a=1;a<t.length;a++)t[a]=t[a-1]+e[a][1];let o=[`if (yC < ${t[0]}) setOutput(getT0(yR, yC));`];for(let a=1;a<t.length;a++){let i=t[a-1];o.push(`else if (yC < ${t[a]}) setOutput(getT${a}(yR, yC-${i}));`)}let n=t.length,s=t[t.length-1];o.push(`else setOutput(getT${n}(yR, yC-${s}));`),this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int yR = coords.x;
int yC = coords.y;
${o.join(`
`)}
}
2023-01-06 19:23:06 +01:00
`}};var bh=class{constructor(e,t){this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[],this.outputShape=S.computeOutShape(e,t);let o=this.outputShape,n=o.length,s=$e(n),a=Et("coords",n),i=["x","y","z","w","u","v"].slice(0,n);this.variableNames=e.map((h,g)=>`T${g}`);let p=new Array(e.length-1);p[0]=e[0][t];for(let h=1;h<p.length;h++)p[h]=p[h-1]+e[h][t];let u=i[t],c=i.slice(-2),l=i.join(),m=`if (${u} < ${p[0]}) {
2022-11-18 17:13:29 +01:00
return getChannel(
getT0(${l}), vec2(${c.join()}));
}`;for(let h=1;h<p.length;h++){let g=p[h-1];m+=`
if (${u} < ${p[h]} && ${u} >= ${p[h-1]}) {
return getChannel(
2022-11-20 22:20:02 +01:00
getT${h}(${yh(i,u,g)}),
vec2(${yh(c,u,g)}));
}`}let d=p.length,f=p[p.length-1];m+=`
2022-11-18 17:13:29 +01:00
return getChannel(
2022-11-20 22:20:02 +01:00
getT${d}(${yh(i,u,f)}),
vec2(${yh(c,u,f)}));`,this.userCode=`
2022-11-18 17:13:29 +01:00
float getValue(${i.map(h=>"int "+h)}) {
${m}
}
void main() {
${s} coords = getOutputCoords();
vec4 result = vec4(getValue(${a}), 0., 0., 0.);
${a[n-1]} = ${a[n-1]} + 1;
if (${a[n-1]} < ${o[n-1]}) {
result.g = getValue(${a});
}
${a[n-2]} = ${a[n-2]} + 1;
if (${a[n-2]} < ${o[n-2]}) {
result.a = getValue(${a});
}
${a[n-1]} = ${a[n-1]} - 1;
if (${a[n-2]} < ${o[n-2]} &&
${a[n-1]} < ${o[n-1]}) {
result.b = getValue(${a});
}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function yh(r,e,t){let o=r.indexOf(e);return r.map((s,a)=>a===o?`${s} - ${t}`:s).join()}function Wu(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.texData.get(o.dataId);return At({inputs:{x:n.complexTensorInfos.imag},backend:t})}var VA={kernelName:ci,backendName:"webgl",kernelFunc:Wu};function hc(r,e,t){let o=r[0].dtype;if(o==="complex64"){let d=r.map(b=>Ka({inputs:{input:b},backend:t})),f=r.map(b=>Wu({inputs:{input:b},backend:t})),h=hc(d,e,t),g=hc(f,e,t),x=Ar({inputs:{real:h,imag:g},backend:t});return d.forEach(b=>t.disposeIntermediateTensorInfo(b)),f.forEach(b=>t.disposeIntermediateTensorInfo(b)),t.disposeIntermediateTensorInfo(h),t.disposeIntermediateTensorInfo(g),x}let n=t.shouldExecuteOnCPU(r);if(o==="string"&&(n=!0),n){let d=r.map(w=>{let _=[-1,y.sizeFromShape(w.shape.slice(e))];return re({inputs:{x:w},backend:t,attrs:{shape:_}})}),f=d.map(w=>({vals:t.readSync(w.dataId),shape:w.shape})),h=S.computeOutShape(d.map(w=>w.shape),1),g=d[0].shape[0]===1,x=G$(f,h,o,g),b=S.computeOutShape(r.map(w=>w.shape),e),C=t.makeTensorInfo(b,o,x);return d.forEach(w=>t.disposeIntermediateTensorInfo(w)),C}let s=r.filter(d=>y.sizeFromShape(d.shape)>0),a=O().getBool("WEBGL_PACK_ARRAY_OPERATIONS")&&s[0].shape.length>1;if(s.length===1){let d=a?new Jt(r[0].shape,Zs):new Er(r[0].shape,Zs);return t.runWebGLProgram(d,r,o)}let i=O().getNumber("WEBGL_MAX_TEXTURES_IN_SHADER");if(s.length>i){let d=[];for(let h=0;h<s.length;h+=i){let g=s.slice(h,h+i);d.push(hc(g,e,t))}let f=hc(d,e,t);for(let h of d)t.disposeIntermediateTensorInfo(h);return f}if(a){let d=new bh(s.map(f=>f.shape),e);return t.runWebGLProgram(d,s,o)}let{tensors2D:p,outShape:u}=jY(s,e,t),c=new xh(p.map(d=>d.shape)),l=t.runWebGLProgram(c,p,o);p.forEach(d=>t.disposeIntermediateTensorInfo(d));let m=re({inputs:{x:l},attrs:{shape:u},backend:t});return t.disposeIntermediateTensorInfo(l),m}function jY(r,e,t){let o=S.computeOutShape(r.map(s=>s.shape),e);return{tensors2D:r.map(s=>re({inputs:{x:s},attrs:{shape:[-1,y.sizeFromShape(s.shape.slice(e))]},backend:t})),outShape:o}}function zw(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o,s=y.parseAxisParam(n,e[0].shape)[0],a=e.map(u=>u.shape);S.assertParamsConsistent(a,s);let i=S.computeOutShape(e.map(u=>u.shape),s);if(y.sizeFromShape(i)===0)return t.makeTensorInfo(i,e[0].dtype,[]);let p=e.filter(u=>y.sizeFromShape(u.shape)>0);return p.length===1?At({inputs:{x:p[0]},backend:t}):hc(p,s,t)}var zA={kernelName:Cs,backendName:"webgl",kernelFunc:zw};var gc=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.outputShape=e.outShape;let a=e.padInfo.top,i=e.padInfo.left,p=e.strideHeight,u=e.strideWidth,c=e.dilationHeight,l=e.dilationWidth,m=e.filterHeight,d=e.filterWidth,f=Math.floor(e.inChannels/4)*4,h=e.inChannels%4,g=e.dataFormat==="channelsLast",x=g?1:2,b=g?2:3,C=g?3:1,w="",k="";o&&(n?w=`float activation(float a) {
2022-11-18 17:13:29 +01:00
float b = getPreluActivationWeightsAtOutCoords();
${o}
}`:s?w=`float activation(float a) {
float b = getLeakyreluAlphaAtOutCoords();
${o}
}`:w=`
float activation(float x) {
${o}
}
`,k="result = activation(result);");let _=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
${w}
const ivec2 strides = ivec2(${p}, ${u});
const ivec2 pads = ivec2(${a}, ${i});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d2 = coords[${C}];
ivec2 xRCCorner =
2022-11-20 22:20:02 +01:00
ivec2(coords[${x}], coords[${b}]) * strides - pads;
2022-11-18 17:13:29 +01:00
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${m}; wR++) {
int xR = xRCorner + wR * ${c};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
2022-11-20 22:20:02 +01:00
for (int wC = 0; wC < ${d}; wC++) {
2022-11-18 17:13:29 +01:00
int xC = xCCorner + wC * ${l};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
2022-11-20 22:20:02 +01:00
for (int d1 = 0; d1 < ${f}; d1 += 4) {
2022-11-18 17:13:29 +01:00
vec4 wValues = vec4(
getW(wR, wC, d1, d2),
getW(wR, wC, d1 + 1, d2),
getW(wR, wC, d1 + 2, d2),
getW(wR, wC, d1 + 3, d2)
);
if (${g}) {
vec4 xValues = vec4(
getX(batch, xR, xC, d1),
getX(batch, xR, xC, d1 + 1),
getX(batch, xR, xC, d1 + 2),
getX(batch, xR, xC, d1 + 3)
);
dotProd += dot(xValues, wValues);
} else {
vec4 xValues = vec4(
getX(batch, d1, xR, xC),
getX(batch, d1 + 1, xR, xC),
getX(batch, d1 + 2, xR, xC),
getX(batch, d1 + 3, xR, xC)
);
dotProd += dot(xValues, wValues);
}
}
if (${h===1}) {
if (${g}) {
dotProd +=
2022-11-20 22:20:02 +01:00
getX(batch, xR, xC, ${f}) *
getW(wR, wC, ${f}, d2);
2022-11-18 17:13:29 +01:00
} else {
dotProd +=
2022-11-20 22:20:02 +01:00
getX(batch, ${f}, xR, xC) *
getW(wR, wC, ${f}, d2);
2022-11-18 17:13:29 +01:00
}
} else if (${h===2}) {
vec2 wValues = vec2(
2022-11-20 22:20:02 +01:00
getW(wR, wC, ${f}, d2),
getW(wR, wC, ${f} + 1, d2)
2022-11-18 17:13:29 +01:00
);
if (${g}) {
vec2 xValues = vec2(
2022-11-20 22:20:02 +01:00
getX(batch, xR, xC, ${f}),
getX(batch, xR, xC, ${f} + 1)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
} else {
vec2 xValues = vec2(
2022-11-20 22:20:02 +01:00
getX(batch, ${f}, xR, xC),
getX(batch, ${f} + 1, xR, xC)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
}
} else if (${h===3}) {
vec3 wValues = vec3(
2022-11-20 22:20:02 +01:00
getW(wR, wC, ${f}, d2),
getW(wR, wC, ${f} + 1, d2),
getW(wR, wC, ${f} + 2, d2)
2022-11-18 17:13:29 +01:00
);
if (${g}) {
vec3 xValues = vec3(
2022-11-20 22:20:02 +01:00
getX(batch, xR, xC, ${f}),
getX(batch, xR, xC, ${f} + 1),
getX(batch, xR, xC, ${f} + 2)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
} else {
vec3 xValues = vec3(
2022-11-20 22:20:02 +01:00
getX(batch, ${f}, xR, xC),
getX(batch, ${f} + 1, xR, xC),
getX(batch, ${f} + 2, xR, xC)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
}
}
}
}
float result = dotProd;
${_}
${k}
setOutput(result);
}
2022-11-20 22:20:02 +01:00
`}},Ch=class{constructor(e){this.variableNames=["x","W"],this.outputShape=e.outShape;let t=e.padInfo.front,o=e.padInfo.top,n=e.padInfo.left,s=e.strideDepth,a=e.strideHeight,i=e.strideWidth,p=e.dilationDepth,u=e.dilationHeight,c=e.dilationWidth,l=e.filterDepth,m=e.filterHeight,d=e.filterWidth,f=Math.floor(e.inChannels/4)*4,h=e.inChannels%4;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec3 strides = ivec3(${s}, ${a}, ${i});
const ivec3 pads = ivec3(${t}, ${o}, ${n});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int d2 = coords.u;
ivec3 xFRCCorner = ivec3(coords.y, coords.z, coords.w) * strides - pads;
int xFCorner = xFRCCorner.x;
int xRCorner = xFRCCorner.y;
int xCCorner = xFRCCorner.z;
// Convolve x(?, ?, ?, d1) with w(:, :, :, d1, d2) to get
// y(yF, yR, yC, d2). ? = to be determined. : = across all
// values in that axis.
float dotProd = 0.0;
for (int wF = 0; wF < ${l}; wF++) {
int xF = xFCorner + wF * ${p};
if (xF < 0 || xF >= ${e.inDepth}) {
continue;
}
for (int wR = 0; wR < ${m}; wR++) {
int xR = xRCorner + wR * ${u};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
2022-11-20 22:20:02 +01:00
for (int wC = 0; wC < ${d}; wC++) {
2022-11-18 17:13:29 +01:00
int xC = xCCorner + wC * ${c};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
2022-11-20 22:20:02 +01:00
for (int d1 = 0; d1 < ${f}; d1 += 4) {
2022-11-18 17:13:29 +01:00
vec4 xValues = vec4(
getX(batch, xF, xR, xC, d1),
getX(batch, xF, xR, xC, d1 + 1),
getX(batch, xF, xR, xC, d1 + 2),
getX(batch, xF, xR, xC, d1 + 3)
);
vec4 wValues = vec4(
getW(wF, wR, wC, d1, d2),
getW(wF, wR, wC, d1 + 1, d2),
getW(wF, wR, wC, d1 + 2, d2),
getW(wF, wR, wC, d1 + 3, d2)
);
dotProd += dot(xValues, wValues);
}
if (${h===1}) {
dotProd +=
2022-11-20 22:20:02 +01:00
getX(batch, xF, xR, xC, ${f}) *
getW(wF, wR, wC, ${f}, d2);
2022-11-18 17:13:29 +01:00
} else if (${h===2}) {
vec2 xValues = vec2(
2022-11-20 22:20:02 +01:00
getX(batch, xF, xR, xC, ${f}),
getX(batch, xF, xR, xC, ${f} + 1)
2022-11-18 17:13:29 +01:00
);
vec2 wValues = vec2(
2022-11-20 22:20:02 +01:00
getW(wF, wR, wC, ${f}, d2),
getW(wF, wR, wC, ${f} + 1, d2)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
} else if (${h===3}) {
vec3 xValues = vec3(
2022-11-20 22:20:02 +01:00
getX(batch, xF, xR, xC, ${f}),
getX(batch, xF, xR, xC, ${f} + 1),
getX(batch, xF, xR, xC, ${f} + 2)
2022-11-18 17:13:29 +01:00
);
vec3 wValues = vec3(
2022-11-20 22:20:02 +01:00
getW(wF, wR, wC, ${f}, d2),
getW(wF, wR, wC, ${f} + 1, d2),
getW(wF, wR, wC, ${f} + 2, d2)
2022-11-18 17:13:29 +01:00
);
dotProd += dot(xValues, wValues);
}
}
}
}
setOutput(dotProd);
}
2023-01-06 19:23:06 +01:00
`}};var xc=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=e.outShape,this.enableShapeUniforms=lt(this.outputShape.length);let a=e.padInfo.left,i=e.strideWidth,p=e.dilationWidth,u=e.filterHeight,c=e.filterWidth,l=c,m=`
2022-11-18 17:13:29 +01:00
int xR; int xC; int xCOffset;
vec4 wTexel; vec4 previous; vec4 final;`;for(let g=0;g<c;g++)m+=`
vec4 xTexelC${g*2};
int xTexelC${g*2}Ready;
vec4 xTexelC${g*2+1};
int xTexelC${g*2+1}Ready;
vec4 xC${g};`;m+=`
for (int r = 0; r < ${u}; r++) {
for (int d1 = 0; d1 < ${e.inChannels}; d1 += 2) {
`;for(let g=0;g<c;g++)m+=`
xTexelC${g*2} = vec4(0.0);
xTexelC${g*2}Ready = 0;
xTexelC${g*2+1} = vec4(0.0);
xTexelC${g*2+1}Ready = 0;
xC${g} = vec4(0.0);`;m+=`
xR = xRCorner + r * dilations[0];
if (xR >=0 && xR < inDims[0]) {
2022-11-20 22:20:02 +01:00
`;for(let g=0;g<(l+1)/2;g++){let x=g*2;if(m+=`
xC = xCCorner + ${x*p};
`,i===1){if(x<c&&(a%2===1?(m+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1;
2022-11-20 22:20:02 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`,p===1&&x>0?m+=`
xC${x} = vec4(xTexelC${x-2}.zw, xTexelC${x}.xy);
2022-11-18 17:13:29 +01:00
`:m+=`
xCOffset = xC + 1 - 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
previous.zw = vec2(0.0);
}
2022-11-20 22:20:02 +01:00
xC${x} = vec4(previous.zw, xTexelC${x}.xy);
2022-11-18 17:13:29 +01:00
} else {
2022-11-20 22:20:02 +01:00
xC${x} = vec4(0.0, 0.0, xTexelC${x}.xy);
2022-11-18 17:13:29 +01:00
}
`):m+=`
2022-11-20 22:20:02 +01:00
if (xC >= 0 && xC < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xC, d1);
2022-11-18 17:13:29 +01:00
if (xC + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x} = xTexelC${x};
`,x+1<c)){let b=a%2===0?y.nearestLargerEven(p):p;p%2===0&&a%2===1||p%2!==0&&a%2!==1?(m+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + imod(pads[1], 2) + ${b};
2022-11-20 22:20:02 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
`,p>1?m+=`
xCOffset -= 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(previous.zw, xTexelC${x+1}.xy);
2022-11-18 17:13:29 +01:00
} else {
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(0.0, 0.0, xTexelC${x+1}.xy);
2022-11-18 17:13:29 +01:00
}
`:m+=`
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(xTexelC${x}.zw, xTexelC${x+1}.xy);
2022-11-18 17:13:29 +01:00
`):b===1?m+=`
2022-11-20 22:20:02 +01:00
xC${x+1} = xTexelC${x};
2022-11-18 17:13:29 +01:00
`:m+=`
xCOffset = xC + ${b};
2022-11-20 22:20:02 +01:00
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x+1} = xTexelC${x+1};
`}}else x<c&&(a%2===1?(m+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1 - strides[1];
2022-11-20 22:20:02 +01:00
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xC + 1, d1);
2022-11-18 17:13:29 +01:00
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xC + 2 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x} = vec4(xTexelC${x}.zw, xTexelC${x+1}.zw);
`,x+1<c&&(m+=`
2022-11-18 17:13:29 +01:00
final = vec4(0.0);
xCOffset = xC + 1 + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1]) {
final = getX(batch, xR, xCOffset, d1);
}
2022-11-20 22:20:02 +01:00
xC${x+1} = vec4(xTexelC${x+1}.xy, final.xy);
2022-11-18 17:13:29 +01:00
`)):(m+=`
2022-11-20 22:20:02 +01:00
if(xC >= 0 && xC < inDims[1] && xTexelC${x}Ready == 0) {
xTexelC${x} = getX(batch, xR, xC, d1);
2022-11-18 17:13:29 +01:00
if (xC + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x}.zw = vec2(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x}Ready = 1;
2022-11-18 17:13:29 +01:00
}
xCOffset = xC + strides[1];
2022-11-20 22:20:02 +01:00
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${x+1}Ready == 0) {
xTexelC${x+1} = getX(batch, xR, xCOffset, d1);
2022-11-18 17:13:29 +01:00
if (xCOffset + 1 >= inDims[1]) {
2022-11-20 22:20:02 +01:00
xTexelC${x+1}.zw = vec2(0.);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xTexelC${x+1}Ready = 1;
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
xC${x} = vec4(
xTexelC${x}.xy, xTexelC${x+1}.xy);
`,x+1<c&&(m+=`
xC${x+1} = vec4(xTexelC${x}.zw, xTexelC${x+1}.zw);
`)));x<c&&(m+=`
wTexel = getW(r, ${x}, d1, d2);
dotProd += xC${x}.xxzz * vec4(wTexel.xy, wTexel.xy);
2022-11-18 17:13:29 +01:00
if(d1 + 1 < ${e.inChannels}) {
2022-11-20 22:20:02 +01:00
dotProd += xC${x}.yyww * vec4(wTexel.zw, wTexel.zw);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`,x+1<c&&(m+=`
wTexel = getW(r, ${x+1}, d1, d2);
dotProd += xC${x+1}.xxzz * vec4(wTexel.xy, wTexel.xy);
2022-11-18 17:13:29 +01:00
if(d1 + 1 < ${e.inChannels}) {
2022-11-20 22:20:02 +01:00
dotProd += xC${x+1}.yyww * vec4(wTexel.zw, wTexel.zw);
2022-11-18 17:13:29 +01:00
}
`))}m+=`
}
`,m+=`
}
`,m+=`
}
2022-11-20 22:20:02 +01:00
`;let d="",f="";o&&(n?d=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getPreluActivationWeightsAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:s?d=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getLeakyreluAlphaAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:d=`vec4 activation(vec4 x) {
2022-11-18 17:13:29 +01:00
${o}
2022-11-20 22:20:02 +01:00
}`,f="result = activation(result);");let h=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
${d}
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
//intialize dotProd with a small epsilon seems to reduce GPU accuracy loss.
vec4 dotProd = vec4(0.000000000000001);
${m}
vec4 result = dotProd - vec4(0.000000000000001);
${h}
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};var Sh=class{constructor(e,t){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"inputShape",type:"ivec4"},{name:"pad",type:"ivec2"},{name:"stride",type:"ivec2"},{name:"dilation",type:"ivec2"},{name:"inChannels",type:"int"},{name:"itemsPerBlockRow",type:"int"},{name:"outWidth",type:"int"}],this.outputShape=e,this.enableShapeUniforms=lt(this.outputShape.length);let{dataFormat:o}=t,n=wt(),s=o==="channelsLast",a=s?1:2,i=s?2:3,p=this.enableShapeUniforms?"if(blockIndex < outShape[2] && pos < outShape[1]) {":`if(blockIndex < ${e[2]} && pos < ${e[1]}) {`,u="";for(let c=0;c<=1;c++)for(let l=0;l<=1;l++)u+=`
2022-11-18 17:13:29 +01:00
blockIndex = rc.z + ${l};
pos = rc.y + ${c};
${p}
offsetY = int(blockIndex / outWidth) * stride[0] - pad[0];
d0 = offsetY + dilation[0] * (pos / itemsPerBlockRow);
if(d0 < inputShape[${a}] && d0 >= 0) {
// Use custom imod instead mod. On Intel GPU, mod may generate
// unexpected value.
// https://github.com/tensorflow/tfjs/issues/5447
offsetX = imod(blockIndex, outWidth) * stride[1] - pad[1];
d1 = offsetX + dilation[1] * (imod(pos, itemsPerBlockRow) /
inChannels);
if(d1 < inputShape[${i}] && d1 >= 0) {
ch = imod(pos, inChannels);
if (${s}) {
innerDims = vec2(d1, ch);
result[${c*2+l}] = getChannel(
getA(rc.x, d0, int(innerDims.x),
int(innerDims.y)), innerDims);
} else {
innerDims = vec2(d0, d1);
result[${c*2+l}] = getChannel(
getA(rc.x, ch, int(innerDims.x),
int(innerDims.y)), innerDims);
}
}
}
}
`;this.userCode=`
void main() {
ivec3 rc = getOutputCoords();
vec4 result = vec4(0);
int blockIndex, pos, offsetY, d0, offsetX, d1, ch;
vec2 innerDims;
${u}
${n.output} = result;
}
2023-01-06 19:23:06 +01:00
`}};function wh(r,e){let t=r.length;return t>=3?e?[...r.slice(0,-3),r[t-3]*r[t-2],r[t-1]]:[...r.slice(0,-3),r[t-3],r[t-2]*r[t-1]]:!e&&t===1&&r[0]>1?[r[0],1]:null}function Ih({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=r.shape,u=o.texData.get(r.dataId),c=t.inChannels,l=p[0]*p[1]*p[2],m=t.outChannels,d=t.dataFormat==="channelsLast",f=!1,h=!1,g,x=[];if(s!=null){let w=wh(s.shape,d);w!=null&&(s=re({inputs:{x:s},backend:o,attrs:{shape:w}}),x.push(s))}if(n!=null){let w=wh(n.shape,d);w!=null&&(n=re({inputs:{x:n},backend:o,attrs:{shape:w}}),x.push(n))}if(!((l===1||m===1)&&c>Mw)&&u.isPacked&&d&&u.texture!=null&&p[2]%2!==0&&y.arraysEqual(u.shape.slice(-3),p.slice(-3))){let w=p[0]*p[1]*(p[2]+1),k={dataId:r.dataId,shape:[1,w,t.inChannels],dtype:r.dtype},_=u.shape;u.shape=u.shape.slice(),u.shape[u.shape.length-2]++,y.assert(Bi(u.shape,k.shape),()=>`packed reshape ${u.shape} to ${k.shape} isn't free`);let E=re({inputs:{x:e},backend:o,attrs:{shape:[1,t.inChannels,t.outChannels]}});x.push(E);let A=zu({a:k,b:E,backend:o,transposeA:f,transposeB:h,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),R=o.texData.get(A.dataId);y.assert(R.isPacked,()=>"batchMatMul result is expected to be packed"),u.shape=_,R.shape=t.outShape,g=At({inputs:{x:A},backend:o}),g.shape=t.outShape,x.push(A)}else{let w=t.outHeight*t.outWidth,k=re({inputs:{x:r},backend:o,attrs:{shape:d?[t.batchSize,w,t.inChannels]:[t.batchSize,t.inChannels,w]}}),_=re({inputs:{x:e},backend:o,attrs:{shape:[1,t.inChannels,t.outChannels]}}),E=zu({a:d?k:_,b:d?_:k,transposeA:!d,transposeB:h,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a});g=re({inputs:{x:E},backend:o,attrs:{shape:t.outShape}}),x.push(k),x.push(_),x.push(E)}for(let w of x)o.disposeIntermediateTensorInfo(w);return g}function vh({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let{filterWidth:p,filterHeight:u,inChannels:c,outWidth:l,outHeight:m,dataFormat:d}=t,f=d==="channelsLast",h=p*u*c,g=m*l,x=[t.batchSize,h,g],b=!0,C=!1,w=[];if(s!=null){let H=wh(s.shape,f);H!=null&&(s=re({inputs:{x:s},backend:o,attrs:{shape:H}}),w.push(s))}if(n!=null){let H=wh(n.shape,f);H!=null&&(n=re({inputs:{x:n},backend:o,attrs:{shape:H}}),w.push(n))}let k=re({inputs:{x:e},backend:o,attrs:{shape:[1,h,y.sizeFromShape(e.shape)/h]}});w.push(k);let _=new Sh(x,t),E=[r.shape,[t.padInfo.top,t.padInfo.left],[t.strideHeight,t.strideWidth],[t.dilationHeight,t.dilationWidth],[t.inChannels],[t.filterWidth*t.inChannels],[t.outWidth]],A=o.runWebGLProgram(_,[r],"float32",E),R=re({inputs:{x:A},backend:o,attrs:{shape:x}});w.push(A),w.push(R);let D=n!=null,P=s!=null,M=i==="leakyrelu",L=i?Ha(i,!0):null,V=new fc(f?R.shape:k.shape,f?k.shape:R.shape,f?[t.batchSize,g,t.outChannels]:[t.batchSize,t.outChannels,g],b,C,D,L,P,M),z=f?[R,k]:[k,R];if(n&&z.push(n),P&&z.push(s),M){let H=o.makeTensorInfo([],"float32",y.createScalarValue(a,"float32"));z.push(H),w.push(H)}let U=o.runWebGLProgram(V,z,"float32"),K=re({inputs:{x:U},backend:o,attrs:{shape:t.outShape}});w.push(U);for(let H of w)o.disposeIntermediateTensorInfo(H);return K}function XY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dataFormat:p,dilations:u,dimRoundingMode:c}=o,l=S.convertConv2DDataFormat(p),m=S.computeConv2DInfo(n.shape,s.shape,a,u,i,c,!1,l),d;if(m.filterHeight===1&&m.filterWidth===1&&m.dilationHeight===1&&m.dilationWidth===1&&m.strideHeight===1&&m.strideWidth===1&&(m.padInfo.type==="SAME"||m.padInfo.type==="VALID"))d=Ih({x:n,filter:s,convInfo:m,backend:t});else if(m.strideWidth<=2&&l==="channelsLast"&&O().getBool("WEBGL_EXP_CONV")){let h=new xc(m),g=[[m.padInfo.top,m.padInfo.left],[m.strideHeight,m.strideWidth],[m.dilationHeight,m.dilationWidth],[m.inHeight,m.inWidth]];d=t.runWebGLProgram(h,[n,s],"float32",g)}else if(O().getBool("WEBGL_CONV_IM2COL"))d=vh({x:n,filter:s,convInfo:m,backend:t});else{let h=new gc(m);d=t.runWebGLProgram(h,[n,s],"float32")}let f=re({inputs:{x:d},backend:t,attrs:{shape:m.outShape
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int wR = coords.x;
int wC = coords.y;
int d1 = coords.z;
int d2 = coords.w;
// Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int b = 0; b < ${e.batchSize}; b++) {
for (int yR = 0; yR < ${e.outHeight}; yR++) {
int xR = wR + yR * ${t} - ${n};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int yC = 0; yC < ${e.outWidth}; yC++) {
int xC = wC + yC * ${o} - ${s};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
if (${a}) {
float dyValue = getDy(b, yR, yC, d2);
float xValue = getX(b, xR, xC, d1);
dotProd += (xValue * dyValue);
} else {
float dyValue = getDy(b, d2, yR, yC);
float xValue = getX(b, d1, xR, xC);
dotProd += (xValue * dyValue);
}
}
}
}
setOutput(dotProd);
}
2022-11-20 22:20:02 +01:00
`}},Nh=class{constructor(e){this.variableNames=["dy","W"],this.outputShape=e.inShape;let t=e.filterHeight,o=e.filterWidth,n=e.strideHeight,s=e.strideWidth,a=e.dataFormat==="channelsLast",i=t-1-e.padInfo.top,p=o-1-e.padInfo.left,u=a?1:2,c=a?2:3,l=a?3:1;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${i}, ${p});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d1 = coords[${l}];
ivec2 dyCorner = ivec2(coords[${u}], coords[${c}]) - pads;
int dyRCorner = dyCorner.x;
int dyCCorner = dyCorner.y;
// Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${t}; wR++) {
float dyR = float(dyRCorner + wR) / ${n}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
int wRPerm = ${t} - 1 - wR;
for (int wC = 0; wC < ${o}; wC++) {
float dyC = float(dyCCorner + wC) / ${s}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${o} - 1 - wC;
for (int d2 = 0; d2 < ${e.outChannels}; d2++) {
if (${a}) {
float xValue = getDy(batch, idyR, idyC, d2);
float wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
} else {
float xValue = getDy(batch, d2, idyR, idyC);
float wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
}
}
}
}
setOutput(dotProd);
}
2022-11-20 22:20:02 +01:00
`}},Th=class{constructor(e){this.variableNames=["x","dy"],this.outputShape=e.filterShape;let t=e.strideDepth,o=e.strideHeight,n=e.strideWidth,s=e.padInfo.front,a=e.padInfo.top,i=e.padInfo.left;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int wF = coords.x;
int wR = coords.y;
int wC = coords.z;
int d1 = coords.w;
int d2 = coords.u;
float dotProd = 0.0;
for (int b = 0; b < ${e.batchSize}; b++) {
for (int yF = 0; yF < ${e.outDepth}; yF++) {
int xF = wF + yF * ${t} - ${s};
if (xF < 0 || xF >= ${e.inDepth}) {
continue;
}
for (int yR = 0; yR < ${e.outHeight}; yR++) {
int xR = wR + yR * ${o} - ${a};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int yC = 0; yC < ${e.outWidth}; yC++) {
int xC = wC + yC * ${n} - ${i};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float dyValue = getDy(b, yF, yR, yC, d2);
float xValue = getX(b, xF, xR, xC, d1);
dotProd += (xValue * dyValue);
}
}
}
}
setOutput(dotProd);
}
2022-11-20 22:20:02 +01:00
`}},_h=class{constructor(e){this.variableNames=["dy","W"],this.outputShape=e.inShape;let t=e.filterDepth,o=e.filterHeight,n=e.filterWidth,s=e.strideDepth,a=e.strideHeight,i=e.strideWidth,p=t-1-e.padInfo.front,u=o-1-e.padInfo.top,c=n-1-e.padInfo.left;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec3 pads = ivec3(${p}, ${u}, ${c});
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int d1 = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyFCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
float dotProd = 0.0;
for (int wF = 0; wF < ${t}; wF++) {
float dyF = float(dyFCorner + wF) / ${s}.0;
if (dyF < 0.0 || dyF >= ${e.outDepth}.0 || fract(dyF) > 0.0) {
continue;
}
int idyF = int(dyF);
int wFPerm = ${t} - 1 - wF;
for (int wR = 0; wR < ${o}; wR++) {
float dyR = float(dyRCorner + wR) / ${a}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 ||
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
int wRPerm = ${o} - 1 - wR;
for (int wC = 0; wC < ${n}; wC++) {
float dyC = float(dyCCorner + wC) / ${i}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${n} - 1 - wC;
for (int d2 = 0; d2 < ${e.outChannels}; d2++) {
float xValue = getDy(batch, idyF, idyR, idyC, d2);
float wValue = getW(wFPerm, wRPerm, wCPerm, d1, d2);
dotProd += xValue * wValue;
}
}
}
}
setOutput(dotProd);
}
2023-01-06 19:23:06 +01:00
`}};function YY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,dy:s}=e,{strides:a,pad:i,dataFormat:p,dimRoundingMode:u,filterShape:c}=o,l=S.convertConv2DDataFormat(p),m=S.computeConv2DInfo(n.shape,c,a,1,i,u,!1,l),d=new kh(m);return t.runWebGLProgram(d,[n,s],"float32")}var UA={kernelName:oi,backendName:"webgl",kernelFunc:YY};function QY(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{inputShape:a,strides:i,pad:p,dataFormat:u,dimRoundingMode:c}=o,l=S.convertConv2DDataFormat(u),m=S.computeConv2DInfo(a,s.shape,i,1,p,c,!1,l),d=new Nh(m);return t.runWebGLProgram(d,[n,s],"float32")}var GA={kernelName:Go,backendName:"webgl",kernelFunc:QY};function ZY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dilations:p}=o,u=S.computeConv3DInfo(n.shape,s.shape,a,p,i),c=new Ch(u);return t.runWebGLProgram(c,[n,s],"float32")}var HA={kernelName:gp,backendName:"webgl",kernelFunc:ZY};function JY(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,dy:s}=e,{strides:a,pad:i,filterShape:p}=o,u=S.computeConv3DInfo(n.shape,p,a,1,i),c=new Th(u);return t.runWebGLProgram(c,[n,s],"float32")}var KA={kernelName:km,backendName:"webgl",kernelFunc:JY};function eQ(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{pad:a,strides:i,inputShape:p}=o,u=S.computeConv3DInfo(p,s.shape,i,1,a),c=new _h(u);return t.runWebGLProgram(c,[n,s],"float32")}var qA={kernelName:xp,backendName:"webgl",kernelFunc:eQ};var tQ=$o+`
2022-11-18 17:13:29 +01:00
return cos(x);
2023-01-06 19:23:06 +01:00
`,rQ=he({opSnippet:tQ}),jA={kernelName:Ho,backendName:"webgl",kernelFunc:rQ};var oQ=`
2022-11-18 17:13:29 +01:00
float e2x = exp(-x);
return (e2x + 1.0 / e2x) / 2.0;
2023-01-06 19:23:06 +01:00
`,nQ=he({opSnippet:oQ}),XA={kernelName:Ko,backendName:"webgl",kernelFunc:nQ};var $h=class{constructor(e,t,o,n,s){this.variableNames=["Image","Boxes","BoxInd"],this.outputShape=[];let[a,i,p,u]=e,[c]=t,[l,m]=o;this.outputShape=[c,l,m,u];let d=n==="bilinear"?1:0,[f,h]=[`${i-1}.0`,`${p-1}.0`],[g,x,b]=l>1?[`${(i-1)/(l-1)}`,"(y2-y1) * height_ratio",`y1*${f} + float(y)*(height_scale)`]:["0.0","0.0",`0.5 * (y1+y2) * ${f}`],[C,w,k]=m>1?[`${(p-1)/(m-1)}`,"(x2-x1) * width_ratio",`x1*${h} + float(x)*(width_scale)`]:["0.0","0.0",`0.5 * (x1+x2) * ${h}`];this.userCode=`
2022-11-18 17:13:29 +01:00
const float height_ratio = float(${g});
const float width_ratio = float(${C});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int y = coords[1];
int x = coords[2];
int d = coords[3];
// get box vals
float y1 = getBoxes(b,0);
float x1 = getBoxes(b,1);
float y2 = getBoxes(b,2);
float x2 = getBoxes(b,3);
// get image in batch index
int bInd = round(getBoxInd(b));
if(bInd < 0 || bInd >= ${a}) {
return;
}
2022-11-20 22:20:02 +01:00
float height_scale = ${x};
2022-11-18 17:13:29 +01:00
float width_scale = ${w};
float in_y = ${b};
2022-11-20 22:20:02 +01:00
if( in_y < 0.0 || in_y > ${f} ) {
2022-11-18 17:13:29 +01:00
setOutput(float(${s}));
return;
}
float in_x = ${k};
if( in_x < 0.0 || in_x > ${h} ) {
setOutput(float(${s}));
return;
}
vec2 sourceFracIndexCR = vec2(in_x,in_y);
2022-11-20 22:20:02 +01:00
if(${d} == 1) {
2022-11-18 17:13:29 +01:00
// Compute the four integer indices.
ivec2 sourceFloorCR = ivec2(sourceFracIndexCR);
ivec2 sourceCeilCR = ivec2(ceil(sourceFracIndexCR));
float topLeft = getImage(b, sourceFloorCR.y, sourceFloorCR.x, d);
float bottomLeft = getImage(b, sourceCeilCR.y, sourceFloorCR.x, d);
float topRight = getImage(b, sourceFloorCR.y, sourceCeilCR.x, d);
float bottomRight = getImage(b, sourceCeilCR.y, sourceCeilCR.x, d);
vec2 fracCR = sourceFracIndexCR - vec2(sourceFloorCR);
float top = topLeft + (topRight - topLeft) * fracCR.x;
float bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR.x;
float newValue = top + (bottom - top) * fracCR.y;
setOutput(newValue);
} else {
// Compute the coordinators of nearest neighbor point.
ivec2 sourceNearestCR = ivec2(floor(
sourceFracIndexCR + vec2(0.5,0.5)));
float newValue = getImage(b, sourceNearestCR.y, sourceNearestCR.x, d);
setOutput(newValue);
}
}
2023-01-06 19:23:06 +01:00
`}};var sQ=r=>{let{inputs:e,backend:t,attrs:o}=r,{image:n,boxes:s,boxInd:a}=e,{cropSize:i,method:p,extrapolationValue:u}=o,c=new $h(n.shape,s.shape,i,p,u);return t.runWebGLProgram(c,[n,s,a],"float32")},YA={kernelName:Xo,backendName:"webgl",kernelFunc:sQ};var Uu;(function(r){r.Prod="*",r.Sum="+"})(Uu||(Uu={}));var Rl=class{constructor(e,t,o,n){this.op=e,this.outputShape=t,this.variableNames=["x"],this.customUniforms=[{name:"index",type:"float"}];let s=this.outputShape.length,a=this.op===Uu.Prod?"1.0":"0.0",i=o?a:`getX(${QA(s,"coords",this.op)})`,p=this.outputShape[this.outputShape.length-1],u="",c="";o?(u=n?`end != ${p-1}`:"end != 0",c=n?"end + 1":"end - 1"):(u=n?`end + pow2 < ${p}`:"end >= pow2",c=n?"end + pow2":"end - pow2"),this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
2023-01-06 19:23:06 +01:00
${$e(s)} coords = getOutputCoords();
int end = ${ZA(s,"coords",this.op)};
2022-11-18 17:13:29 +01:00
float val = ${i};
int pow2 = int(pow(2.0, index));
if (${u}) {
int idx = ${c};
2023-01-06 19:23:06 +01:00
${ZA(s,"coords",this.op)} = idx;
val ${this.op}= getX(${QA(s,"coords",this.op)});
2022-11-18 17:13:29 +01:00
}
setOutput(val);
}
2023-01-06 19:23:06 +01:00
`}};function QA(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.x, ${e}.y`;if(r===3)return`${e}.x, ${e}.y, ${e}.z`;if(r===4)return`${e}.x, ${e}.y, ${e}.z, ${e}.w`;throw new Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function ZA(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.y`;if(r===3)return`${e}.z`;if(r===4)return`${e}.w`;throw new Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function Eh(r,e,t,o,n,s){let a=e.shape.length,i=S.getAxesPermutation([o],a),p=e;i!=null&&(p=yt({inputs:{x:e},backend:t,attrs:{perm:i}}));let u=S.getInnerMostAxes(1,a)[0];if(u!==a-1)throw new Error(`WebGL cumprod shader expects an inner-most axis=${e.shape.length-1} but got axis=${o}`);let c=p.shape[u],l=At({inputs:{x:p},backend:t});for(let m=0;m<=Math.ceil(Math.log2(c))-1;m++){let d=new Rl(r,p.shape,!1,s),f=[[m]],h=l;l=t.runWebGLProgram(d,[l],l.dtype,f),t.disposeIntermediateTensorInfo(h)}if(n){let m=new Rl(r,p.shape,n,s),d=l;l=t.runWebGLProgram(m,[l],l.dtype),t.disposeIntermediateTensorInfo(d)}if(i!=null){let m=S.getUndoAxesPermutation(i),d=yt({inputs:{x:l},backend:t,attrs:{perm:m}});return t.disposeIntermediateTensorInfo(l),t.disposeIntermediateTensorInfo(p),d}return l}function aQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return Eh(Uu.Prod,n,t,s,a,i)}var JA={kernelName:qo,backendName:"webgl",kernelFunc:aQ};function iQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return Eh(Uu.Sum,n,t,s,a,i)}var eR={kernelName:jo,backendName:"webgl",kernelFunc:iQ};function uQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,weights:s}=e,{size:a,binaryOutput:i}=o;if(n.shape.length===1){let p=t.readSync(n.dataId),u=t.readSync(s.dataId),c=Kf(p,u,s.dtype,s.shape,a);return t.makeTensorInfo([a],s.dtype,c)}else if(n.shape.length===2){let p=t.bufferSync(n),u=t.bufferSync(s),c=z$(p,u,a,i);return t.makeTensorInfo(c.shape,s.dtype,c.values)}throw new Error(`Error in denseBincount: input must be at most rank 2, but got rank${n.shape.length}.`)}var tR={kernelName:ni,backendName:"webgl",kernelFunc:uQ};var Ah=class{constructor(e,t,o){this.variableNames=["x"],this.outputShape=[],this.outputShape=e,this.blockSize=t,this.dataFormat=o,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int h = ${this.getHeightCoordString()};
int w = ${this.getWidthCoordString()};
int d = ${this.getDepthCoordString()};
int in_h = h / ${t};
int offset_h = imod(h, ${t});
int in_w = w / ${t};
int offset_w = imod(w, ${t});
int offset_d = (offset_h * ${t} + offset_w) *
${this.getOutputDepthSize()};
int in_d = d + offset_d;
float result = ${this.getInputSamplingString()};
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}getHeightCoordString(){return this.dataFormat==="NHWC"?"coords[1]":"coords[2]"}getWidthCoordString(){return this.dataFormat==="NHWC"?"coords[2]":"coords[3]"}getDepthCoordString(){return this.dataFormat==="NHWC"?"coords[3]":"coords[1]"}getOutputDepthSize(){return this.dataFormat==="NHWC"?this.outputShape[3]:this.outputShape[1]}getInputSamplingString(){return this.dataFormat==="NHWC"?"getX(b, in_h, in_w, in_d)":"getX(b, in_d, in_h, in_w)"}};function pQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockSize:s,dataFormat:a}=o,i=n.shape[0],p=a==="NHWC"?n.shape[1]:n.shape[2],u=a==="NHWC"?n.shape[2]:n.shape[3],c=a==="NHWC"?n.shape[3]:n.shape[1],l=p*s,m=u*s,d=c/(s*s),f=a==="NHWC"?[i,l,m,d]:[i,d,l,m],h=new Ah(f,s,a);return t.runWebGLProgram(h,[n],n.dtype)}var rR={kernelName:Yo,backendName:"webgl",kernelFunc:pQ};var yc=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=e.outShape,this.enableShapeUniforms=lt(this.outputShape.length);let a=e.filterHeight,i=e.filterWidth,p=e.outChannels/e.inChannels,u="",c="";o&&(n?u=`float activation(float a) {
2022-11-18 17:13:29 +01:00
float b = getPreluActivationWeightsAtOutCoords();
${o}
}`:s?u=`float activation(float a) {
float b = getLeakyreluAlphaAtOutCoords();
${o}
}`:u=`
float activation(float x) {
${o}
}
`,c="result = activation(result);");let l=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
${u}
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int d1 = d2 / ${p};
int q = d2 - d1 * ${p};
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
// Convolve x(?, ?, d1) with w(:, :, d1, q) to get y(yR, yC, d2).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
// TO DO(dsmilkov): Flatten the two for loops and vec4 the operations.
for (int wR = 0; wR < ${a}; wR++) {
int xR = xRCorner + wR * dilations[0];
if (xR < 0 || xR >= inDims[0]) {
continue;
}
for (int wC = 0; wC < ${i}; wC++) {
int xC = xCCorner + wC * dilations[1];
if (xC < 0 || xC >= inDims[1]) {
continue;
}
float xVal = getX(batch, xR, xC, d1);
float wVal = getW(wR, wC, d1, q);
dotProd += xVal * wVal;
}
}
float result = dotProd;
${l}
${c}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};var bc=class{constructor(e,t=!1,o=null,n=!1,s=!1){this.variableNames=["x","W"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"pads",type:"ivec2"},{name:"strides",type:"ivec2"},{name:"dilations",type:"ivec2"},{name:"inDims",type:"ivec2"}],this.outputShape=e.outShape,this.enableShapeUniforms=lt(this.outputShape.length);let a=e.outChannels/e.inChannels,i=e.padInfo.left,p=e.strideWidth,u=e.dilationWidth,c=e.filterHeight,l=e.filterWidth,m=l,d=`
2022-11-18 17:13:29 +01:00
int xR; int xC; int xCOffset;
2022-11-20 22:20:02 +01:00
vec4 wTexel; vec4 previous; vec4 final;`;for(let x=0;x<l;x++)d+=`
vec4 xTexelC${x*2};
int xTexelC${x*2}Ready;
vec4 xTexelC${x*2+1};
int xTexelC${x*2+1}Ready;
vec4 xC${x};`;d+=`
2022-11-18 17:13:29 +01:00
for (int r = 0; r < ${c}; r++) {
2022-11-20 22:20:02 +01:00
`;for(let x=0;x<l;x++)d+=`
xTexelC${x*2} = vec4(0.0);
xTexelC${x*2}Ready = 0;
xTexelC${x*2+1} = vec4(0.0);
xTexelC${x*2+1}Ready = 0;
xC${x} = vec4(0.0);`;d+=`
2022-11-18 17:13:29 +01:00
xR = xRCorner + r * dilations[0];
if (xR >=0 && xR < inDims[0]) {
2022-11-20 22:20:02 +01:00
`;for(let x=0;x<(m+1)/2;x++){let b=x*2;if(d+=`
2022-11-18 17:13:29 +01:00
xC = xCCorner + ${b*u};
2022-11-20 22:20:02 +01:00
`,p===1){if(b<l&&(i%2===1?(d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1;
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
2022-11-20 22:20:02 +01:00
`,u===1&&b>0?d+=`
2022-11-18 17:13:29 +01:00
xC${b} = vec4(xTexelC${b-2}.zw, xTexelC${b}.xy);
2022-11-20 22:20:02 +01:00
`:d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1 - 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
previous.zw = vec2(0.0);
}
xC${b} = vec4(previous.zw, xTexelC${b}.xy);
} else {
xC${b} = vec4(0.0, 0.0, xTexelC${b}.xy);
}
2022-11-20 22:20:02 +01:00
`):d+=`
2022-11-18 17:13:29 +01:00
if (xC >= 0 && xC < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
xC${b} = xTexelC${b};
2022-11-20 22:20:02 +01:00
`,b+1<l)){let C=i%2===0?y.nearestLargerEven(u):u;u%2===0&&i%2===1||u%2!==0&&i%2!==1?(d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + imod(pads[1], 2) + ${C};
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
2022-11-20 22:20:02 +01:00
`,u>1?d+=`
2022-11-18 17:13:29 +01:00
xCOffset -= 2;
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
xC${b+1} = vec4(previous.zw, xTexelC${b+1}.xy);
} else {
xC${b+1} = vec4(0.0, 0.0, xTexelC${b+1}.xy);
}
2022-11-20 22:20:02 +01:00
`:d+=`
2022-11-18 17:13:29 +01:00
xC${b+1} = vec4(xTexelC${b}.zw, xTexelC${b+1}.xy);
2022-11-20 22:20:02 +01:00
`):C===1?d+=`
2022-11-18 17:13:29 +01:00
xC${b+1} = xTexelC${b};
2022-11-20 22:20:02 +01:00
`:d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + ${C};
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
xC${b+1} = xTexelC${b+1};
2022-11-20 22:20:02 +01:00
`}}else b<l&&(i%2===1?(d+=`
2022-11-18 17:13:29 +01:00
xCOffset = xC + 1 - strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xC + 1, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xC + 2 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.0);
}
xTexelC${b+1}Ready = 1;
}
xC${b} = vec4(xTexelC${b}.zw, xTexelC${b+1}.zw);
2022-11-20 22:20:02 +01:00
`,b+1<l&&(d+=`
2022-11-18 17:13:29 +01:00
final = vec4(0.0);
xCOffset = xC + 1 + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1]) {
final = getX(batch, xR, xCOffset, d1);
}
xC${b+1} = vec4(xTexelC${b+1}.xy, final.xy);
2022-11-20 22:20:02 +01:00
`)):(d+=`
2022-11-18 17:13:29 +01:00
if(xC >= 0 && xC < inDims[1] && xTexelC${b}Ready == 0) {
xTexelC${b} = getX(batch, xR, xC, d1);
if (xC + 1 >= inDims[1]) {
xTexelC${b}.zw = vec2(0.0);
}
xTexelC${b}Ready = 1;
}
xCOffset = xC + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${b+1}Ready == 0) {
xTexelC${b+1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= inDims[1]) {
xTexelC${b+1}.zw = vec2(0.);
}
xTexelC${b+1}Ready = 1;
}
xC${b} = vec4(
xTexelC${b}.xy, xTexelC${b+1}.xy);
2022-11-20 22:20:02 +01:00
`,b+1<l&&(d+=`
2022-11-18 17:13:29 +01:00
xC${b+1} = vec4(xTexelC${b}.zw, xTexelC${b+1}.zw);
2022-11-20 22:20:02 +01:00
`)));b<l&&(d+=`
2022-11-18 17:13:29 +01:00
wTexel = getW(r, ${b}, d1, q);
dotProd += xC${b} * vec4(wTexel.xz, wTexel.xz);
2022-11-20 22:20:02 +01:00
`,b+1<l&&(d+=`
2022-11-18 17:13:29 +01:00
wTexel = getW(r, ${b+1}, d1, q);
dotProd += xC${b+1} * vec4(wTexel.xz, wTexel.xz);
2022-11-20 22:20:02 +01:00
`))}d+=`
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`,d+=`
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
`;let f="",h="";o&&(n?f=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getPreluActivationWeightsAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:s?f=`vec4 activation(vec4 a) {
2022-11-18 17:13:29 +01:00
vec4 b = getLeakyreluAlphaAtOutCoords();
${o}
2022-11-20 22:20:02 +01:00
}`:f=`vec4 activation(vec4 x) {
2022-11-18 17:13:29 +01:00
${o}
}`,h="result = activation(result);");let g=t?"result += getBiasAtOutCoords();":"";t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),s&&this.variableNames.push("leakyreluAlpha"),this.userCode=`
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
int d2 = coords.w;
int d1 = d2 / ${a};
int q = d2 - d1 * ${a};
int xRCorner = xRCCorner.x;
int xCCorner = xRCCorner.y;
//intialize dotProd with a small epsilon seems to reduce GPU accuracy loss.
vec4 dotProd = vec4(0.000000000000001);
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
vec4 result = dotProd - vec4(0.000000000000001);
${g}
${h}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function cQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dilations:p,dimRoundingMode:u}=o,c=p;c==null&&(c=[1,1]),y.assert(S.eitherStridesOrDilationsAreOne(a,c),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${a} and dilations '${c}'`);let l=S.computeConv2DInfo(n.shape,s.shape,a,c,i,u,!0),m;O().getBool("WEBGL_PACK_DEPTHWISECONV")&&l.strideWidth<=2&&l.outChannels/l.inChannels===1?m=new bc(l):m=new yc(l);let d=[[l.padInfo.top,l.padInfo.left],[l.strideHeight,l.strideWidth],[l.dilationHeight,l.dilationWidth],[l.inHeight,l.inWidth]];return t.runWebGLProgram(m,[n,s],"float32",d)}var oR={kernelName:Qo,backendName:"webgl",kernelFunc:cQ};var Rh=class{constructor(e){this.variableNames=["x","dy"],this.outputShape=e.filterShape;let t=e.strideHeight,o=e.strideWidth,n=e.padInfo.top,s=e.padInfo.left,a=e.outChannels/e.inChannels;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int wR = coords.x;
int wC = coords.y;
int d1 = coords.z;
int dm = coords.w;
int d2 = d1 * ${a} + dm;
float dotProd = 0.0;
// TO DO: Vec4 over the batch size
for (int b = 0; b < ${e.batchSize}; b++) {
for (int yR = 0; yR < ${e.outHeight}; yR++) {
int xR = wR + yR * ${t} - ${n};
if (xR < 0 || xR >= ${e.inHeight}) {
continue;
}
for (int yC = 0; yC < ${e.outWidth}; yC++) {
int xC = wC + yC * ${o} - ${s};
if (xC < 0 || xC >= ${e.inWidth}) {
continue;
}
float dyValue = getDy(b, yR, yC, d2);
float xValue = getX(b, xR, xC, d1);
dotProd += (xValue * dyValue);
}
}
}
setOutput(dotProd);
}
2023-01-06 19:23:06 +01:00
`}},Dh=class{constructor(e){this.variableNames=["dy","W"],this.outputShape=e.inShape;let t=e.filterHeight,o=e.filterWidth,n=e.strideHeight,s=e.strideWidth,a=t-1-e.padInfo.top,i=o-1-e.padInfo.left,p=e.outChannels/e.inChannels;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${a}, ${i});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords[0];
int d1 = coords[3];
ivec2 dyCorner = coords.yz - pads;
int dyRCorner = dyCorner.x;
int dyCCorner = dyCorner.y;
float dotProd = 0.0;
for (int wR = 0; wR < ${t}; wR++) {
float dyR = float(dyRCorner + wR) / ${n}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
int wRPerm = ${t} - 1 - wR;
for (int wC = 0; wC < ${o}; wC++) {
float dyC = float(dyCCorner + wC) / ${s}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
int wCPerm = ${o} - 1 - wC;
// TO DO: Vec4 over the channelMul
for (int dm = 0; dm < ${p}; dm++) {
int d2 = d1 * ${p} + dm;
float xValue = getDy(batch, idyR, idyC, d2);
float wValue = getW(wRPerm, wCPerm, d1, dm);
dotProd += xValue * wValue;
}
}
}
setOutput(dotProd);
}
2023-01-06 19:23:06 +01:00
`}};function lQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,dy:s}=e,{strides:a,dilations:i,pad:p,dimRoundingMode:u,filterShape:c}=o,l=S.computeConv2DInfo(n.shape,c,a,i,p,u,!0),m=new Rh(l);return t.runWebGLProgram(m,[n,s],"float32")}var nR={kernelName:yp,backendName:"webgl",kernelFunc:lQ};function mQ(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{strides:a,dilations:i,pad:p,dimRoundingMode:u,inputShape:c}=o,l=S.computeConv2DInfo(c,s.shape,a,i,p,u,!0),m=new Dh(l);return t.runWebGLProgram(m,[n,s],"float32")}var sR={kernelName:bp,backendName:"webgl",kernelFunc:mQ};var Fh=class{constructor(e){this.variableNames=["X"],this.outputShape=[e,e],this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
float val = coords[0] == coords[1] ? getX(coords[0]) : 0.0;
setOutput(val);
}
2023-01-06 19:23:06 +01:00
`}};function dQ(r){let{inputs:e,backend:t}=r,{x:o}=e,n=[...o.shape,...o.shape],s=y.sizeFromShape(o.shape),a=re({inputs:{x:o},backend:t,attrs:{shape:[s]}}),i=new Fh(s),p=t.runWebGLProgram(i,[a],a.dtype),u=re({inputs:{x:p},backend:t,attrs:{shape:n}});return t.disposeIntermediateTensorInfo(a),t.disposeIntermediateTensorInfo(p),u}var aR={kernelName:si,backendName:"webgl",kernelFunc:dQ};var Oh=class{constructor(e){this.variableNames=["x","W"],this.outputShape=e.outShape;let{inHeight:t,inWidth:o,padInfo:n,strideHeight:s,strideWidth:a,filterHeight:i,filterWidth:p,dilationHeight:u,dilationWidth:c}=e,{top:l,left:m}=n;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 strides = ivec2(${s}, ${a});
const ivec2 pads = ivec2(${l}, ${m});
const float neg_infinity = -3.4e38;
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
int d1 = coords.w;
ivec2 outTopLeftCorner =
coords.yz * strides - pads;
int hBeg = outTopLeftCorner.x;
int wBeg = outTopLeftCorner.y;
float curVal = neg_infinity;
for (int h = 0; h < ${i}; h++) {
int hIn = hBeg + h * ${u};
if (hIn >= 0 && hIn < ${t}) {
for (int w = 0; w < ${p}; w++) {
int wIn = wBeg + w * ${c};
if (wIn >= 0 && wIn < ${o}) {
float xVal = getX(batch, hIn, wIn, d1);
float wVal = getW(h, w, d1);
float val = xVal + wVal;
if (val > curVal) {
curVal = val;
}
}
}
}
}
float result = curVal;
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};function fQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dilations:p}=o,u=S.computeDilation2DInfo(n.shape,s.shape,a,i,"NHWC",p),c,l=new Oh(u);c=t.runWebGLProgram(l,[n,s],"float32");let m=re({inputs:{x:c},backend:t,attrs:{shape:u.outShape}});return t.disposeIntermediateTensorInfo(c),m}var iR={kernelName:ai,backendName:"webgl",kernelFunc:fQ};function hQ(r){let{inputs:e,backend:t,attrs:o}=r,{equation:n}=o,s=e,{allDims:a,summedDims:i,idDims:p}=S.decodeEinsumEquation(n,s.length);S.checkEinsumDimSizes(a.length,p,s);let{path:u,steps:c}=S.getEinsumComputePath(i,p),l=c.length,m=null,d=a.length,f=[];for(let h=0;h<l;++h){for(let g of c[h]){let{permutationIndices:x,expandDims:b}=S.getEinsumPermutation(d,p[g]),C;S.isIdentityPermutation(x)?C=s[g]:(C=yt({inputs:{x:s[g]},backend:t,attrs:{perm:x}}),f.push(C));let w=C.shape.slice();for(let k=0;k<b.length;++k)w.splice(b[k],0,1);y.arraysEqual(C.shape,w)||(C=re({inputs:{x:C},backend:t,attrs:{shape:w}}),f.push(C)),m===null?m=C:(m=El({inputs:{a:C,b:m},backend:t}),f.push(m))}h<l-1&&(u[h]>=0&&(m=Vu({inputs:{x:m},backend:t,attrs:{axis:u[h]-(a.length-d),keepDims:!1}}),f.push(m)),d--)}for(let h of f)h!==m&&t.disposeIntermediateTensorInfo(h);return m}var uR={kernelName:ii,backendName:"webgl",kernelFunc:hQ};var gQ="return (x >= 0.0) ? x : (exp(x) - 1.0);",xQ=`
2022-11-18 17:13:29 +01:00
vec4 result;
result.r = (x.r >= 0.0) ? x.r : (exp(x.r) - 1.0);
result.g = (x.g >= 0.0) ? x.g : (exp(x.g) - 1.0);
result.b = (x.b >= 0.0) ? x.b : (exp(x.b) - 1.0);
result.a = (x.a >= 0.0) ? x.a : (exp(x.a) - 1.0);
return result;
2023-01-06 19:23:06 +01:00
`,yQ=he({opSnippet:gQ,packedOpSnippet:xQ}),pR={kernelName:Jo,backendName:"webgl",kernelFunc:yQ};var bQ="return (b >= 1.0) ? a : a * (b + 1.0);",CQ=`
2022-11-18 17:13:29 +01:00
vec4 bGTEZero = vec4(greaterThanEqual(b, vec4(0.)));
return (bGTEZero * a) + ((vec4(1.0) - bGTEZero) * (a * (b + vec4(1.0))));
2023-01-06 19:23:06 +01:00
`,SQ=r=>{let{inputs:e,backend:t}=r,{dy:o,y:n}=e,s=O().getBool("WEBGL_PACK_BINARY_OPERATIONS")?new _o(CQ,o.shape,n.shape):new io(bQ,o.shape,n.shape);return t.runWebGLProgram(s,[o,n],o.dtype)},cR={kernelName:Nm,backendName:"webgl",kernelFunc:SQ};var wQ=`
2022-11-18 17:13:29 +01:00
return vec4(equal(a, b));
2023-01-06 19:23:06 +01:00
`,IQ="return float(a == b);",vQ=tt({opSnippet:IQ,packedOpSnippet:wQ,dtype:"bool",cpuKernelImpl:H$}),lR={kernelName:en,backendName:"webgl",kernelFunc:vQ};var kQ=`
2022-11-18 17:13:29 +01:00
// Error function is calculated approximately with elementary function.
// See "Handbook of Mathematical Functions with Formulas,
// Graphs, and Mathematical Tables", Abramowitz and Stegun.
2022-11-20 22:20:02 +01:00
float p = ${S.ERF_P};
float a1 = ${S.ERF_A1};
float a2 = ${S.ERF_A2};
float a3 = ${S.ERF_A3};
float a4 = ${S.ERF_A4};
float a5 = ${S.ERF_A5};
2022-11-18 17:13:29 +01:00
float sign = sign(x);
x = abs(x);
float t = 1.0 / (1.0 + p * x);
return sign * (1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x));
2023-01-06 19:23:06 +01:00
`,NQ=he({opSnippet:kQ}),mR={kernelName:da,backendName:"webgl",kernelFunc:NQ};var TQ=$o+`
2022-11-18 17:13:29 +01:00
return exp(x);
2023-01-06 19:23:06 +01:00
`,_Q=`
2022-11-18 17:13:29 +01:00
vec4 result = exp(x);
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
2023-01-06 19:23:06 +01:00
`,Ww=he({opSnippet:TQ,packedOpSnippet:_Q,cpuKernelImpl:K$,dtype:"float32"}),dR={kernelName:tn,backendName:"webgl",kernelFunc:Ww};function Ph(r){let{inputs:e,attrs:t,backend:o}=r,{dim:n}=t,{input:s}=e,a=s.shape.length,i=s.shape.slice(),p=n;return n<0&&(y.assert(-(a+1)<=n,()=>`Axis must be in the interval [${-(a+1)}, ${a}]`),p=a+n+1),i.splice(p,0,1),re({inputs:{x:s},backend:o,attrs:{shape:i}})}var fR={kernelName:Ss,backendName:"webgl",kernelFunc:Ph};var hR="return exp(x) - 1.0;",$Q=he({opSnippet:hR,packedOpSnippet:hR,cpuKernelImpl:q$}),gR={kernelName:fa,backendName:"webgl",kernelFunc:$Q};var Dl=class{constructor(e,t,o){this.variableNames=["real","imag"];let n=t[1];this.outputShape=t;let s=o?`2.0 * ${Math.PI}`:`-2.0 * ${Math.PI}`,a=o?`${n}.0`:"1.0",i;if(e==="real")i="return real * expR - imag * expI;";else if(e==="imag")i="return real * expI + imag * expR;";else throw new Error(`FFT component must be either "real" or "imag", got ${e}.`);this.userCode=`
2022-11-18 17:13:29 +01:00
const float exponentMultiplier = ${s};
float unaryOpComplex(float real, float expR, float imag, float expI) {
${i}
}
float mulMatDFT(int batch, int index) {
float indexRatio = float(index) / float(${n});
float exponentMultiplierTimesIndexRatio =
exponentMultiplier * indexRatio;
float result = 0.0;
for (int i = 0; i < ${n}; i++) {
// x = (-2|2 * PI / N) * index * i;
float x = exponentMultiplierTimesIndexRatio * float(i);
float expR = cos(x);
float expI = sin(x);
float real = getReal(batch, i);
float imag = getImag(batch, i);
result +=
unaryOpComplex(real, expR, imag, expI) / ${a};
}
return result;
}
void main() {
ivec2 coords = getOutputCoords();
setOutput(mulMatDFT(coords[0], coords[1]));
}
2023-01-06 19:23:06 +01:00
`}};function Mh(r,e,t){let o=t.texData.get(r.dataId),n=y.sizeFromShape(r.shape),s=r.shape[r.shape.length-1],a=n/s,i=re({inputs:{x:r},backend:t,attrs:{shape:[a,s]}}),p=i.shape,u=new Dl("real",p,e),c=new Dl("imag",p,e),l=[{dataId:o.complexTensorInfos.real.dataId,dtype:o.complexTensorInfos.real.dtype,shape:p},{dataId:o.complexTensorInfos.imag.dataId,dtype:o.complexTensorInfos.imag.dtype,shape:p}],m=t.runWebGLProgram(u,l,"float32"),d=t.runWebGLProgram(c,l,"float32"),f=Ar({inputs:{real:m,imag:d},backend:t});t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(d);let h=re({inputs:{x:f},backend:t,attrs:{shape:r.shape}});return t.disposeIntermediateTensorInfo(i),t.disposeIntermediateTensorInfo(f),h}function EQ(r){let{inputs:e,backend:t}=r,{input:o}=e;return Mh(o,!1,t)}var xR={kernelName:ui,backendName:"webgl",kernelFunc:EQ};var Lh=class{constructor(e,t){this.outputShape=[],this.customUniforms=[{name:"value",type:"float"}],this.variableNames=["x"],this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
// Input can be obtained from uniform value.
setOutput(value);
}
2023-01-06 19:23:06 +01:00
`}};function qa(r){let{backend:e,attrs:t}=r,{shape:o,value:n}=t,{dtype:s}=t;if(s=s||y.inferDtype(n),s==="string"){let a=y.getArrayFromDType(s,y.sizeFromShape(o));return a.fill(n),e.makeTensorInfo(o,s,a)}else{let a=new Lh(o,n),i=[[n]];return e.runWebGLProgram(a,[],s,i)}}var yR={kernelName:ws,backendName:"webgl",kernelFunc:qa};var Bh=class{constructor(e){this.variableNames=["Image"],this.outputShape=[];let t=e[2];this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int x = coords[2];
int coordX = ${t} - x - 1;
float outputValue;
if(coordX >= 0 && coordX < ${t}) {
outputValue = getImage(coords[0], coords[1], coordX, coords[3]);
} else {
outputValue = getImage(coords[0], coords[1], coords[2], coords[3]);
}
setOutput(outputValue);
}
2023-01-06 19:23:06 +01:00
`}};var bR={kernelName:rn,backendName:"webgl",kernelFunc:({inputs:r,backend:e})=>{let{image:t}=r,o=e,n=new Bh(t.shape);return o.runWebGLProgram(n,[t],t.dtype)}};var CR="return floor(x);",AQ=he({opSnippet:CR,packedOpSnippet:CR,cpuKernelImpl:j$}),SR={kernelName:on,backendName:"webgl",kernelFunc:AQ};var RQ=`
2022-11-18 17:13:29 +01:00
float s = sign(a) * sign(b);
int ia = round(a);
int ib = round(b);
if (ib != 0) {
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
return float(idiv(ia, ib, s));
} else {
return NAN;
}
2023-01-06 19:23:06 +01:00
`,DQ=`
2022-11-18 17:13:29 +01:00
ivec4 ia = round(a);
ivec4 ib = round(b);
bvec4 cond = notEqual(ib, ivec4(0));
ivec4 result = ivec4(0);
vec4 s = sign(a) * sign(b);
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
if (cond[0]) {
result[0] = idiv(ia[0], ib[0], s[0]);
}
if (cond[1]) {
result[1] = idiv(ia[1], ib[1], s[1]);
}
if (cond[2]) {
result[2] = idiv(ia[2], ib[2], s[2]);
}
if (cond[3]) {
result[3] = idiv(ia[3], ib[3], s[3]);
}
return vec4(result);
2023-01-06 19:23:06 +01:00
`,FQ=tt({opSnippet:RQ,packedOpSnippet:DQ,dtype:"int32"}),wR={kernelName:nn,backendName:"webgl",kernelFunc:FQ};var Vh=class{constructor(e){this.variableNames=["A"];let t=wt(),[o,n]=e;this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
int texR = coords[0];
int texC = coords[1];
int depth = coords[2];
vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${n}.0, ${o}.0);
vec4 values = ${t.texture2D}(A, uv);
float value;
if (depth == 0) {
value = values.r;
} else if (depth == 1) {
value = values.g;
} else if (depth == 2) {
value = values.b;
} else if (depth == 3) {
value = values.a;
}
setOutput(floor(value * 255.0 + 0.5));
}
2023-01-06 19:23:06 +01:00
`}};var zh=class{constructor(e){this.variableNames=["A"],this.packedInputs=!1,this.packedOutput=!0;let t=wt(),[o,n]=e;this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec3 coords = getOutputCoords();
int texR = coords[0];
int texC = coords[1];
int depth = coords[2];
vec4 result = vec4(0.);
for(int row=0; row<=1; row++) {
for(int col=0; col<=1; col++) {
texC = coords[1] + row;
depth = coords[2] + col;
vec2 uv = (vec2(texC, texR) + halfCR) /
vec2(${n}.0, ${o}.0);
vec4 values = ${t.texture2D}(A, uv);
float value;
if (depth == 0) {
value = values.r;
} else if (depth == 1) {
value = values.g;
} else if (depth == 2) {
value = values.b;
} else if (depth == 3) {
value = values.a;
}
result[row * 2 + col] = floor(value * 255.0 + 0.5);
}
}
${t.output} = result;
}
2023-01-06 19:23:06 +01:00
`}};var IR={kernelName:Zi,backendName:"webgl",kernelFunc:OQ},Cc,Uw=O().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");function OQ(r){let{inputs:e,backend:t,attrs:o}=r,{pixels:n}=e,{numChannels:s}=o,a=typeof HTMLVideoElement!="undefined"&&n instanceof HTMLVideoElement,i=typeof HTMLImageElement!="undefined"&&n instanceof HTMLImageElement,[p,u]=a?[n.videoWidth,n.videoHeight]:[n.width,n.height],c=[u,p],l=[u,p,s];if(i||a){let h=O().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");(Cc==null||h!==Uw)&&(Uw=h,Cc=document.createElement("canvas").getContext("2d",{willReadFrequently:Uw})),Cc.canvas.width=p,Cc.canvas.height=u,Cc.drawImage(n,0,0,p,u),n=Cc.canvas}let m=t.makeTensorInfo(c,"int32");t.texData.get(m.dataId).usage=ur.PIXELS,t.gpgpu.uploadPixelDataToTexture(t.getTexture(m.dataId),n);let d=O().getBool("WEBGL_PACK")?new zh(l):new Vh(l),f=t.runWebGLProgram(d,[m],"int32");return t.disposeData(m.dataId),f}function PQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dataFormat:c,dilations:l,dimRoundingMode:m,activation:d,leakyreluAlpha:f}=o,h=S.convertConv2DDataFormat(c),g=S.computeConv2DInfo(n.shape,s.shape,p,l,u,m,!1,h),x,b=[],C=a!=null,w=i!=null,k=d==="leakyrelu",_=()=>{let A=[n,s],R=(D,P)=>{if(P==="NCHW"&&D.shape.length===1&&D.shape[0]!==1){let M=re({inputs:{x:D},backend:t,attrs:{shape:[D.shape[0],1,1]}});return b.push(M),M}return D};if(C&&A.push(R(a,c)),w&&A.push(R(i,c)),k){let D=t.makeTensorInfo([],"float32",y.createScalarValue(f,"float32"));A.push(D),b.push(D)}return A};if(g.filterHeight===1&&g.filterWidth===1&&g.dilationHeight===1&&g.dilationWidth===1&&g.strideHeight===1&&g.strideWidth===1&&(g.padInfo.type==="SAME"||g.padInfo.type==="VALID"))x=Ih({x:n,filter:s,convInfo:g,backend:t,bias:a,activation:d,preluActivationWeights:i,leakyreluAlpha:f});else if(g.strideWidth<=2&&h==="channelsLast"&&O().getBool("WEBGL_EXP_CONV")){let A=d?Ha(d,!0):null,R=new xc(g,C,A,w,k),D=[[g.padInfo.top,g.padInfo.left],[g.strideHeight,g.strideWidth],[g.dilationHeight,g.dilationWidth],[g.inHeight,g.inWidth]],P=_();x=t.runWebGLProgram(R,P,"float32",D)}else if(O().getBool("WEBGL_CONV_IM2COL"))x=vh({x:n,filter:s,convInfo:g,backend:t,bias:a,activation:d,preluActivationWeights:i,leakyreluAlpha:f});else{let A=d?Ha(d,!1):null,R=new gc(g,C,A,w,k),D=_();x=t.runWebGLProgram(R,D,"float32")}let E=re({inputs:{x},backend:t,attrs:{shape:g.outShape}});return b.push(x),b.forEach(A=>t.disposeIntermediateTensorInfo(A)),E}var vR={kernelName:go,backendName:"webgl",kernelFunc:PQ};function MQ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dilations:c,dimRoundingMode:l,activation:m,leakyreluAlpha:d}=o,f=[],h=c;h==null&&(h=[1,1]),y.assert(S.eitherStridesOrDilationsAreOne(p,h),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${p} and dilations '${h}'`);let g=S.computeConv2DInfo(n.shape,s.shape,p,h,u,l,!0),x=O().getBool("WEBGL_PACK_DEPTHWISECONV")&&g.strideWidth<=2&&g.outChannels/g.inChannels===1,b=m?Ha(m,x):null,C=[n,s],w=a!=null,k=i!=null,_=m==="leakyrelu";if(w&&C.push(a),k&&C.push(i),_){let D=t.makeTensorInfo([],"float32",y.createScalarValue(d,"float32"));C.push(D),f.push(D)}let E;x?E=new bc(g,w,b,k,_):E=new yc(g,w,b,k,_);let A=[[g.padInfo.top,g.padInfo.left],[g.strideHeight,g.strideWidth],[g.dilationHeight,g.dilationWidth],[g.inHeight,g.inWidth]],R=t.runWebGLProgram(E,C,"float32",A);return f.forEach(D=>t.disposeIntermediateTensorInfo(D)),R}var kR={kernelName:xo,backendName:"webgl",kernelFunc:MQ};var Wh=class{constructor(e,t,o,n){this.sliceDim=e,this.strides=t,this.paramsShape=n,this.variableNames=["x","indices"],this.outputShape=o;let s=$e(o.length),a=`
2022-11-18 17:13:29 +01:00
int index;`;for(let i=0;i<this.sliceDim;i++)a+=`
index = round(getIndices(coords[0], ${i}));
out_of_bounds = out_of_bounds || index < 0;
out_of_bounds = out_of_bounds || index >= ${this.paramsShape[i]};
flattenIndex += index * ${this.strides[i]};`;this.userCode=`
void main() {
${s} coords = getOutputCoords();
int flattenIndex = 0;
bool out_of_bounds = false;
${a}
setOutput(out_of_bounds ? 0.0 : getX(flattenIndex, coords[1]));
}
2023-01-06 19:23:06 +01:00
`}};function LQ(r){let{inputs:e,backend:t}=r,{params:o,indices:n}=e,s=n.shape,a=s[s.length-1],i=y.sizeFromShape(o.shape),[p,u,c,l]=S.prepareAndValidate(o,n),m=re({inputs:{x:n},backend:t,attrs:{shape:[u,a]}}),d=re({inputs:{x:o},backend:t,attrs:{shape:[y.sizeFromShape(o.shape)/c,c]}});if(t.shouldExecuteOnCPU([o,n])||o.dtype==="string"){let x=t.readSync(n.dataId),b=t.bufferSync(o),C=X$(x,b,o.dtype,u,a,c,l,o.shape,i);return t.makeTensorInfo(p,o.dtype,C.values)}let f=new Wh(a,l,[u,c],o.shape),h=t.runWebGLProgram(f,[d,m],d.dtype),g=re({inputs:{x:h},backend:t,attrs:{shape:p}});return t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(d),t.disposeIntermediateTensorInfo(h),g}var NR={kernelName:an,backendName:"webgl",kernelFunc:LQ};var Uh=class{constructor(e,t){this.variableNames=["A","indices"],this.outputShape=t,this.rank=t.length;let o=$e(this.rank),n=BQ(e,2);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${o} resRC = getOutputCoords();
int index = int(getIndices(resRC.x, resRC.z));
float inBounds = (index >= 0) && (index < ${e[2]}) ? 1.0 : 0.0;
setOutput(inBounds * getA(${n}));
}
2023-01-06 19:23:06 +01:00
`}};function BQ(r,e){let t=["resRC.x","resRC.y","resRC.z","resRC.w"],o=[];for(let n=0;n<r.length;n++)n===2?o.push("index"):o.push(`${t[n]}`);return o.join()}function Gw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,indices:s}=e,{axis:a,batchDims:i}=o,p=y.parseAxisParam(a,n.shape)[0];if(O().get("DEBUG")){let b=t.readSync(s.dataId),C=n.shape[p];for(let w=0;w<b.length;++w){let k=b[w];y.assert(k<=C-1&&k>=0,()=>`GatherV2: the index value ${k} is not in [0, ${C-1}]`)}}let u=S.segment_util.collectGatherOpShapeInfo(n,s,p,i),c=y.sizeFromShape(s.shape),l=[],m=re({inputs:{x:n},backend:t,attrs:{shape:[u.batchSize,u.outerSize,u.dimSize,u.sliceSize]}}),d=re({inputs:{x:s},backend:t,attrs:{shape:[u.batchSize,c/u.batchSize]}});l.push(m),l.push(d);let f=[u.batchSize,u.outerSize,c/u.batchSize,u.sliceSize];if(t.shouldExecuteOnCPU([n,s])||n.dtype==="string"){let b=t.bufferSync(d),C=t.bufferSync(m),w=Y$(C,b,f);return l.forEach(k=>t.disposeIntermediateTensorInfo(k)),t.makeTensorInfo(u.outputShape,w.dtype,w.values)}let h=new Uh(m.shape,f),g=t.runWebGLProgram(h,[m,d],m.dtype);l.push(g);let x=re({inputs:{x:g},backend:t,attrs:{shape:u.outputShape}});return l.forEach(b=>t.disposeIntermediateTensorInfo(b)),x}var TR={kernelName:Is,backendName:"webgl",kernelFunc:Gw};var VQ="return float(a > b);",zQ=`
2022-11-18 17:13:29 +01:00
return vec4(greaterThan(a, b));
2023-01-06 19:23:06 +01:00
`,WQ=tt({opSnippet:VQ,packedOpSnippet:zQ,cpuKernelImpl:Q$,dtype:"bool"}),_R={kernelName:un,backendName:"webgl",kernelFunc:WQ};var UQ="return float(a >= b);",GQ=`
2022-11-18 17:13:29 +01:00
return vec4(greaterThanEqual(a, b));
2023-01-06 19:23:06 +01:00
`,HQ=tt({opSnippet:UQ,packedOpSnippet:GQ,dtype:"bool",cpuKernelImpl:Z$}),$R={kernelName:pn,backendName:"webgl",kernelFunc:HQ};function KQ(r){let{inputs:e,backend:t}=r,{input:o}=e;return Mh(o,!0,t)}var ER={kernelName:pi,backendName:"webgl",kernelFunc:KQ};var qQ="return float(!isnan(x) && !isinf(x));",jQ=he({opSnippet:qQ,dtype:"bool"}),AR={kernelName:ha,backendName:"webgl",kernelFunc:jQ};var XQ="return float(isinf(x));",YQ=he({opSnippet:XQ,dtype:"bool"}),RR={kernelName:ga,backendName:"webgl",kernelFunc:YQ};var QQ="return float(isnan(x));",ZQ=he({opSnippet:QQ,dtype:"bool"}),DR={kernelName:cn,backendName:"webgl",kernelFunc:ZQ};var JQ="return float(a < b);",e7=`
2022-11-18 17:13:29 +01:00
return vec4(lessThan(a, b));
2023-01-06 19:23:06 +01:00
`,t7=tt({opSnippet:JQ,packedOpSnippet:e7,cpuKernelImpl:J$,dtype:"bool"}),FR={kernelName:mn,backendName:"webgl",kernelFunc:t7};var r7="return float(a <= b);",o7=`
2022-11-18 17:13:29 +01:00
return vec4(lessThanEqual(a, b));
2023-01-06 19:23:06 +01:00
`,n7=tt({opSnippet:r7,packedOpSnippet:o7,cpuKernelImpl:eE,dtype:"bool"}),OR={kernelName:dn,backendName:"webgl",kernelFunc:n7};function s7(r){let{backend:e,attrs:t}=r,{start:o,stop:n,num:s}=t,a=tE(o,n,s);return e.makeTensorInfo([a.length],"float32",a)}var PR={kernelName:li,backendName:"webgl",kernelFunc:s7};var a7=$o+`
2022-11-18 17:13:29 +01:00
return x < 0.0 ? 0./0. : log(x);
2023-01-06 19:23:06 +01:00
`,i7=`
2022-11-18 17:13:29 +01:00
vec4 result = log(x);
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : (x.r < 0.0 ? 0./0. : result.r);
result.g = isNaN.g ? x.g : (x.g < 0.0 ? 0./0. : result.g);
result.b = isNaN.b ? x.b : (x.b < 0.0 ? 0./0. : result.b);
result.a = isNaN.a ? x.a : (x.a < 0.0 ? 0./0. : result.a);
return result;
2023-01-06 19:23:06 +01:00
`,u7=he({opSnippet:a7,packedOpSnippet:i7,cpuKernelImpl:rE}),MR={kernelName:fn,backendName:"webgl",kernelFunc:u7};var p7=$o+`
2022-11-18 17:13:29 +01:00
return log(1.0 + x);
2023-01-06 19:23:06 +01:00
`,c7=he({opSnippet:p7}),LR={kernelName:xa,backendName:"webgl",kernelFunc:c7};var l7="return float(a >= 1.0 && b >= 1.0);",m7=`
2022-11-18 17:13:29 +01:00
return vec4(
vec4(greaterThanEqual(a, vec4(1.0))) *
vec4(greaterThanEqual(b, vec4(1.0))));
2023-01-06 19:23:06 +01:00
`,d7=tt({opSnippet:l7,packedOpSnippet:m7,dtype:"bool"}),BR={kernelName:hn,backendName:"webgl",kernelFunc:d7};var f7="return float(!(x >= 1.0));",h7=he({opSnippet:f7}),VR={kernelName:gn,backendName:"webgl",kernelFunc:h7};var g7="return float(a >= 1.0 || b >= 1.0);",x7=`
2022-11-18 17:13:29 +01:00
return min(
vec4(greaterThanEqual(a, vec4(1.0))) +
vec4(greaterThanEqual(b, vec4(1.0))),
vec4(1.0));
2023-01-06 19:23:06 +01:00
`,y7=tt({opSnippet:g7,packedOpSnippet:x7,dtype:"bool"}),zR={kernelName:xn,backendName:"webgl",kernelFunc:y7};var Gh=class{constructor(e,t,o,n,s){this.variableNames=["x"],this.outputShape=[];let a=t,i=e[3]-1;this.outputShape=e;let p,u=`float(${o}) + float(${n}) * sum`;s===.5?p=`inversesqrt(${u})`:s===1?p=`1.0/(${u})`:p=`exp(log(${u}) * float(-${s}));`,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int r = coords[1];
int c = coords[2];
int d = coords[3];
float x = getX(b, r, c, d);
float sum = 0.0;
for (int j = -${a}; j <= ${a}; j++) {
int idx = d + j;
if (idx >= 0 && idx <= ${i}) {
float z = getX(b, r, c, idx);
sum += z * z;
}
}
float val = x * ${p};
setOutput(val);
}
2022-11-20 22:20:02 +01:00
`}};var Hh=class{constructor(e,t,o,n,s){this.variableNames=["x"],this.outputShape=[],this.packedInputs=!0,this.packedOutput=!0;let a=t,i=e[3]-1;this.outputShape=e;let p,u=`float(${o}) + float(${n}) * sum`;s===.5?p=`inversesqrt(${u})`:s===1?p=`1.0/(${u})`:p=`exp(log(${u}) * float(-${s}));`,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords.x;
int r = coords.y;
int c = coords.z;
int d = coords.w;
bool hasNextCol = d < ${this.outputShape[3]};
bool hasNextRow = c < ${this.outputShape[2]};
vec4 sum = vec4(0.);
vec4 xFragAtOutputCoords = getX(b, r, c, d);
vec4 xAtOutputCoords = vec4(
getChannel(xFragAtOutputCoords, vec2(c, d)),
hasNextCol ?
getChannel(xFragAtOutputCoords, vec2(c, d + 1)) : 0.0,
hasNextRow ?
getChannel(xFragAtOutputCoords , vec2(c + 1, d)) : 0.0,
(hasNextRow && hasNextCol) ?
getChannel(xFragAtOutputCoords, vec2(c + 1, d + 1)) : 0.0
);
int firstChannel = d - ${a};
vec2 cache = vec2(0.);
if(firstChannel >= 0){
vec4 firstChannelFrag = getX(b, r, c, firstChannel);
cache.x = getChannel(firstChannelFrag, vec2(c, firstChannel));
if(hasNextRow){
cache.y = getChannel(firstChannelFrag, vec2(c + 1, firstChannel));
}
}
ivec2 depth = ivec2(d, d + 1);
for (int j = - ${a}; j <= ${a}; j++) {
ivec2 idx = depth + j;
bvec2 aboveLowerBound = greaterThanEqual(idx, ivec2(0));
bvec2 belowUpperBound = lessThanEqual(idx, ivec2(${i}));
bool depthInRange = aboveLowerBound.x && belowUpperBound.x;
bool depthPlusOneInRange = aboveLowerBound.y && belowUpperBound.y;
if(depthInRange || depthPlusOneInRange){
vec4 z = vec4(0.);
vec4 xFragAtCurrentDepth;
z.xz = cache.xy;
if(depthPlusOneInRange && hasNextCol){
xFragAtCurrentDepth = idx.y != d ?
getX(b, r, c, idx.y) : xFragAtOutputCoords;
z.y = getChannel(xFragAtCurrentDepth, vec2(c, idx.y));
if(hasNextRow){
z.w = getChannel(xFragAtCurrentDepth, vec2(c + 1, idx.y));
}
}
cache.xy = z.yw;
sum += z * z;
}
}
vec4 result = xAtOutputCoords * ${p};
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};var b7=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{depthRadius:s,bias:a,alpha:i,beta:p}=o,u=O().getBool("WEBGL_PACK_NORMALIZATION")?new Hh(n.shape,s,a,i,p):new Gh(n.shape,s,a,i,p);return t.runWebGLProgram(u,[n],n.dtype)},WR={kernelName:mi,backendName:"webgl",kernelFunc:b7};var Kh=class{constructor(e,t,o,n,s){this.variableNames=["inputImage","outputImage","dy"],this.outputShape=[],this.outputShape=e,this.depth=e[3],this.depthRadius=t,this.bias=o,this.alpha=n,this.beta=s,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int r = coords[1];
int c = coords[2];
float result = 0.0;
for (int d = 0; d < ${this.depth}; ++d) {
int depthBegin = int(max(0.0, float(d - ${t})));
int depthEnd = int(min(float(${this.depth}),
float(d + ${t} + 1)));
const int MIN_DEPTH_BEGIN = 0;
const int MAX_DEPTH_END = ${this.depth};
float norm = 0.0;
for (int k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; ++k) {
if (k < depthBegin){
continue;
}
else if (k >= depthBegin && k < depthEnd) {
norm += getInputImage(b, r, c, k) * getInputImage(b, r, c, k);
}
else {
break;
}
}
norm = float(${n}) * norm + float(${o});
for(int k = MIN_DEPTH_BEGIN; k < MAX_DEPTH_END; ++k){
if (k < depthBegin){
continue;
}
else if (k >= depthBegin && k < depthEnd){
float dyi = -2.0 * float(${n})
* float(${s})
2023-01-06 19:23:06 +01:00
* getInputImage(b, r, c, k) * getOutputImage(b, r, c, d)
2022-11-18 17:13:29 +01:00
/ norm;
if (k == d) {
dyi += pow(norm, -1.0 * ${s});
}
if (k == coords[3]) {
dyi *= getDy(b, r, c, d);
result += dyi;
}
}
else {
break;
}
}
}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};var C7=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n,y:s,dy:a}=e,{depthRadius:i,bias:p,alpha:u,beta:c}=o,l=new Kh(n.shape,i,p,u,c);return t.runWebGLProgram(l,[n,s,a],n.dtype)},UR={kernelName:Tm,backendName:"webgl",kernelFunc:C7};function GR(r,e,t,o){let n=y.sizeFromShape(e),a=y.sizeFromShape(r.shape)/n,i=re({inputs:{x:r},attrs:{shape:[a,n]},backend:o}),p=Ur(i,r.dtype,"max",o),u=re({inputs:{x:p},attrs:{shape:t},backend:o});return o.disposeIntermediateTensorInfo(i),o.disposeIntermediateTensorInfo(p),u}function Hw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reductionIndices:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=S.getAxesPermutation(u,i),l=c!=null,m=t.shouldExecuteOnCPU([n]),d=n;if(l){if(m){let C=t.texData.get(d.dataId).values,w=new Array(i);for(let E=0;E<w.length;E++)w[E]=n.shape[c[E]];let k=Bu(C,n.shape,n.dtype,c,w);d=t.makeTensorInfo(w,n.dtype);let _=t.texData.get(d.dataId);_.values=k}else d=zi(n,c,t);u=S.getInnerMostAxes(u.length,i)}S.assertAxesAreInnerMostDims("max",u,i);let[f,h]=S.computeOutAndReduceShapes(d.shape,u),g=f;a&&(g=S.expandShapeToKeepDim(f,p));let x;if(m){let C=t.texData.get(d.dataId).values,w=oE(C,y.sizeFromShape(h),g,n.dtype);x=t.makeTensorInfo(g,n.dtype);let k=t.texData.get(x.dataId);k.values=w}else x=GR(d,h,g,t);return l&&t.disposeIntermediateTensorInfo(d),x}var HR={kernelName:yn,backendName:"webgl",kernelFunc:Hw};var S7=dc+`
2022-11-18 17:13:29 +01:00
return max(a, b);
2023-01-06 19:23:06 +01:00
`,w7=`
2022-11-18 17:13:29 +01:00
vec4 result = vec4(max(a, b));
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
2023-01-06 19:23:06 +01:00
`+Js+`
2022-11-18 17:13:29 +01:00
return result;
2023-01-06 19:23:06 +01:00
`,I7=tt({opSnippet:S7,packedOpSnippet:w7,cpuKernelImpl:nE}),KR={kernelName:bn,backendName:"webgl",kernelFunc:I7};function v7(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e;us(n,"maxPool");let{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1;y.assert(S.eitherStridesOrDilationsAreOne(a,u),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${u}'`);let c=S.computePool2DInfo(n.shape,s,a,u,i,p);if(c.filterWidth===1&&c.filterHeight===1&&y.arraysEqual(c.inShape,c.outShape))return At({inputs:{x:n},backend:t});let l=new cs(c,"max",!1);return t.runWebGLProgram(l,[n],n.dtype)}var qR={kernelName:Cn,backendName:"webgl",kernelFunc:v7};function k7(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dataFormat:p,dimRoundingMode:u}=o,c=[1,1,1],l=S.computePool3DInfo(n.shape,s,a,c,i,u,p),m=new Wi(l,"max",!1);return t.runWebGLProgram(m,[n],n.dtype)}var jR={kernelName:Cp,backendName:"webgl",kernelFunc:k7};var qh=class{constructor(e){this.variableNames=["dy","maxPos"],this.outputShape=e.inShape;let t=e.strideHeight,o=e.strideWidth,n=e.dilationHeight,s=e.effectiveFilterHeight,a=e.effectiveFilterWidth,i=s-1-e.padInfo.top,p=a-1-e.padInfo.left,u=s*a-1;this.userCode=`
2022-11-18 17:13:29 +01:00
const ivec2 pads = ivec2(${i}, ${p});
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 dyRCCorner = coords.yz - pads;
int dyRCorner = dyRCCorner.x;
int dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wR = 0; wR < ${s};
wR += ${n}) {
float dyR = float(dyRCorner + wR) / ${t}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 || fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${a}; wC++) {
float dyC = float(dyCCorner + wC) / ${o}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(b, idyR, idyC, d);
int maxPosValue = ${u} - int(getMaxPos(b, idyR, idyC, d));
// Get the current value, check it against the value from the
// position matrix.
int curPosValue = wR * ${a} + wC;
float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
dotProd += dyValue * mask;
}
}
setOutput(dotProd);
}
2022-11-20 22:20:02 +01:00
`}},jh=class{constructor(e){this.variableNames=["dy","maxPos"],this.outputShape=e.inShape;let t=e.strideDepth,o=e.strideHeight,n=e.strideWidth,s=e.dilationDepth,a=e.dilationHeight,i=e.dilationWidth,p=e.effectiveFilterDepth,u=e.effectiveFilterHeight,c=e.effectiveFilterWidth,l=p-1-e.padInfo.front,m=u-1-e.padInfo.top,d=c-1-e.padInfo.left,f=p*u*c-1;this.userCode=`
const ivec3 pads = ivec3(${l}, ${m}, ${d});
2022-11-18 17:13:29 +01:00
void main() {
ivec5 coords = getOutputCoords();
int batch = coords.x;
int ch = coords.u;
ivec3 dyCorner = ivec3(coords.y, coords.z, coords.w) - pads;
int dyDCorner = dyCorner.x;
int dyRCorner = dyCorner.y;
int dyCCorner = dyCorner.z;
// Convolve dy(?, ?, ?, ch) with pos mask(:, :, :, d) to get
// dx(xD, xR, xC, ch).
// ? = to be determined. : = across all values in that axis.
float dotProd = 0.0;
for (int wD = 0; wD < ${p};
wD += ${s}) {
float dyD = float(dyDCorner + wD) / ${t}.0;
if (dyD < 0.0 || dyD >= ${e.outDepth}.0 || fract(dyD) > 0.0) {
continue;
}
int idyD = int(dyD);
for (int wR = 0; wR < ${u};
wR += ${a}) {
float dyR = float(dyRCorner + wR) / ${o}.0;
if (dyR < 0.0 || dyR >= ${e.outHeight}.0 ||
fract(dyR) > 0.0) {
continue;
}
int idyR = int(dyR);
for (int wC = 0; wC < ${c};
wC += ${i}) {
float dyC = float(dyCCorner + wC) / ${n}.0;
if (dyC < 0.0 || dyC >= ${e.outWidth}.0 ||
fract(dyC) > 0.0) {
continue;
}
int idyC = int(dyC);
float dyValue = getDy(batch, idyD, idyR, idyC, ch);
2022-11-20 22:20:02 +01:00
int maxPosValue = ${f} -
2022-11-18 17:13:29 +01:00
int(getMaxPos(batch, idyD, idyR, idyC, ch));
// Get the current value, check it against the value from the
// position matrix.
int curPosValue =
wD * ${u} * ${c} +
wR * ${c} + wC;
float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
dotProd += dyValue * mask;
}
}
}
setOutput(dotProd);
}
2023-01-06 19:23:06 +01:00
`}};function N7(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s}=e,a=s,{filterSize:i,strides:p,pad:u,dimRoundingMode:c}=o,l=[1,1,1],m=S.computePool3DInfo(a.shape,i,p,l,u,c),d=new Wi(m,"max",!0),f=t.runWebGLProgram(d,[a],a.dtype),h=new jh(m),g=t.runWebGLProgram(h,[n,f],a.dtype);return t.disposeIntermediateTensorInfo(f),g}var XR={kernelName:$m,backendName:"webgl",kernelFunc:N7};function T7(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s,output:a}=e,i=s;us([s,a],"maxPoolGrad");let{filterSize:p,strides:u,pad:c,dimRoundingMode:l}=o,m=S.computePool2DInfo(i.shape,p,u,1,c,l),d=!0,f=new cs(m,"max",d),h=t.runWebGLProgram(f,[i],i.dtype),g=new qh(m),x=t.runWebGLProgram(g,[n,h],i.dtype);return t.disposeIntermediateTensorInfo(h),x}var YR={kernelName:_m,backendName:"webgl",kernelFunc:T7};function QR(r,e,t,o){let n=new cs(t,"max",!1),s=o.runWebGLProgram(n,[r],"float32");n=new cs(t,"max",!0,!0,e);let a=o.runWebGLProgram(n,[r],"float32");return[s,a]}var ZR={kernelName:Sp,backendName:"webgl",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o}=r,{filterSize:n,strides:s,pad:a,includeBatchInIndex:i}=e,p=t;y.assert(o.shape.length===4,()=>`Error in maxPool: input must be rank 4 but got rank ${o.shape.length}.`);let u=[1,1];y.assert(S.eitherStridesOrDilationsAreOne(s,u),()=>`Error in maxPool: Either strides or dilations must be 1. Got strides ${s} and dilations '${u}'`);let c=S.computePool2DInfo(o.shape,n,s,u,a),[l,m]=QR(o,i,c,p);return[l,m]}};function JR(r,e,t,o){let n=y.sizeFromShape(e),a=y.sizeFromShape(r.shape)/n,i=re({inputs:{x:r},attrs:{shape:[a,n]},backend:o}),p=Ur(i,"float32","mean",o),u=re({inputs:{x:p},attrs:{shape:t},backend:o});return o.disposeIntermediateTensorInfo(i),o.disposeIntermediateTensorInfo(p),u}var eD={kernelName:Sn,backendName:"webgl",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o}=r,{keepDims:n,axis:s}=e,a=t,i=o.shape.length,p=y.parseAxisParam(s,o.shape),u=p,c=S.getAxesPermutation(u,i),l=c!=null,m=a.shouldExecuteOnCPU([o]),d=[],f=o;if(l){if(m){let w=a.texData.get(f.dataId).values,k=new Array(i);for(let A=0;A<k.length;A++)k[A]=o.shape[c[A]];let _=Bu(w,o.shape,o.dtype,c,k);f=a.makeTensorInfo(k,o.dtype);let E=a.texData.get(f.dataId);E.values=_}else f=zi(o,c,a);d.push(f),u=S.getInnerMostAxes(u.length,i)}S.assertAxesAreInnerMostDims("sum",u,i);let[h,g]=S.computeOutAndReduceShapes(f.shape,u),x=h;n&&(x=S.expandShapeToKeepDim(h,p));let b=JR(f,g,x,a);for(let C of d)a.disposeIntermediateTensorInfo(C);return b}};function _7(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=y.parseAxisParam(s,n.shape),u=p,c=S.getAxesPermutation(u,i),l=n;c!=null&&(l=yt({inputs:{x:n},backend:t,attrs:{perm:c}}),u=S.getInnerMostAxes(u.length,n.shape.length)),S.assertAxesAreInnerMostDims("min",u,i);let[m,d]=S.computeOutAndReduceShapes(l.shape,u),f=y.sizeFromShape(d),h=re({inputs:{x:l},backend:t,attrs:{shape:[-1,f]}}),g=Ur(h,h.dtype,"min",t),x;if(a){let b=S.expandShapeToKeepDim(m,p);x=re({inputs:{x:g},backend:t,attrs:{shape:b}})}else x=re({inputs:{x:g},backend:t,attrs:{shape:m}});return t.disposeIntermediateTensorInfo(h),t.disposeIntermediateTensorInfo(g),c!=null&&t.disposeIntermediateTensorInfo(l),x}var tD={kernelName:wn,backendName:"webgl",kernelFunc:_7};var $7=dc+`
2022-11-18 17:13:29 +01:00
return min(a, b);
2023-01-06 19:23:06 +01:00
`,E7=`
2022-11-18 17:13:29 +01:00
vec4 result = vec4(min(a, b));
bvec4 isNaNA = isnan(a);
bvec4 isNaNB = isnan(b);
bvec4 isNaN = bvec4(isNaNA.x || isNaNB.x, isNaNA.y || isNaNB.y, isNaNA.z || isNaNB.z, isNaNA.w || isNaNB.w);
2023-01-06 19:23:06 +01:00
`+Js+`
2022-11-18 17:13:29 +01:00
return result;
2023-01-06 19:23:06 +01:00
`,A7=tt({opSnippet:$7,packedOpSnippet:E7,cpuKernelImpl:sE}),rD={kernelName:In,backendName:"webgl",kernelFunc:A7};var Xh=class{constructor(e,t,o){this.variableNames=["x"],this.outputShape=t.map((c,l)=>c[0]+e[l]+c[1]);let n=e.length,s=$e(n),a=t.map(c=>c[0]).join(","),i=t.map((c,l)=>c[0]+e[l]).join(","),p=["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,n),u=o==="reflect"?0:1;if(n===1){this.userCode=`
2022-11-18 17:13:29 +01:00
int start = ${a};
int end = ${i};
void main() {
int outC = getOutputCoords();
if (outC < start) {
outC = start * 2 - outC - ${u};
} else if(outC >= end) {
outC = (end - 1) * 2 - outC + ${u};
}
setOutput(getX(outC - start));
}
`;return}this.userCode=`
${s} start = ${s}(${a});
${s} end = ${s}(${i});
void main() {
${s} outC = getOutputCoords();
for (int i = 0; i < ${n}; i++) {
if (outC[i] < start[i]) {
outC[i] = start[i] * 2 - outC[i] - ${u};
} else if(outC[i] >= end[i]) {
outC[i] = (end[i] - 1) * 2 - outC[i] + ${u};
}
}
${s} coords = outC - start;
setOutput(getX(${p}));
}
2023-01-06 19:23:06 +01:00
`}};var Yh=class{constructor(e,t,o){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=t.map((f,h)=>f[0]+e[h]+f[1]);let n=e.length,s=$e(n),a=t.map(f=>f[0]).join(","),i=t.map((f,h)=>f[0]+e[h]).join(","),p=Et("rc",n),u=Et("source",n),c=`${p[n-1]} < ${this.outputShape[n-1]}`,l=n===1?"source":`vec2(${u.slice(-2).join()})`,m=o==="reflect"?0:1,d="";if(n===1){let f=`
2022-11-18 17:13:29 +01:00
${s} source = rc;
if (source < start) {
source = start * 2 - source - ${m};
} else if (source >= end) {
source = (end - 1) * 2 - source + ${m};
}
source -= start;
2022-11-20 22:20:02 +01:00
`;d=`
2022-11-18 17:13:29 +01:00
${s} rc = outputLoc;
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[0] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[1] = getChannel(getX(${u.join()}), ${l});
}
2022-11-20 22:20:02 +01:00
`}else{let f=`
2022-11-18 17:13:29 +01:00
${s} source = rc;
${s} lt = ${s}(lessThan(source, start));
${s} gte = ${s}(greaterThanEqual(source, end));
${s} orig = 1 - (lt + gte);
source = orig * source +
lt * (start * 2 - source - ${m}) +
gte * ((end - 1) * 2 - source + ${m});
source -= start;
2022-11-20 22:20:02 +01:00
`;d=`
2022-11-18 17:13:29 +01:00
${s} rc = outputLoc;
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[0] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[1] = getChannel(getX(${u.join()}), ${l});
}
rc = outputLoc;
${p[n-2]} += 1;
if(${p[n-2]} < ${this.outputShape[n-2]}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[2] = getChannel(getX(${u.join()}), ${l});
${p[n-1]} += 1;
if(${c}) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
result[3] = getChannel(getX(${u.join()}), ${l});
}
}
`}this.userCode=`
const ${s} start = ${s}(${a});
const ${s} end = ${s}(${i});
void main() {
${s} outputLoc = getOutputCoords();
vec4 result = vec4(0.);
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};var R7=({inputs:r,backend:e,attrs:t})=>{let{x:o}=r,{paddings:n,mode:s}=t,a=O().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new Yh(o.shape,n,s):new Xh(o.shape,n,s);return e.runWebGLProgram(a,[o],o.dtype)},oD={kernelName:vn,backendName:"webgl",kernelFunc:R7};var D7=`if (b == 0.0) return NAN;
return mod(a, b);`,F7=`
2022-11-18 17:13:29 +01:00
vec4 result = mod(a, b);
bvec4 isNaN = equal(b, vec4(0.0));
2023-01-06 19:23:06 +01:00
`+Js+`
2022-11-18 17:13:29 +01:00
return result;
2023-01-06 19:23:06 +01:00
`,O7=tt({opSnippet:D7,packedOpSnippet:F7}),nD={kernelName:ya,backendName:"webgl",kernelFunc:O7};var Qh=class{constructor(e,t,o){this.variableNames=["probs"],this.customUniforms=[{name:"seed",type:"float"}],this.outputShape=[e,o],this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
float r = random(seed);
float cdf = 0.0;
for (int i = 0; i < ${t-1}; i++) {
cdf += getProbs(batch, i);
if (r < cdf) {
setOutput(float(i));
return;
}
}
// If no other event happened, last event happened.
setOutput(float(${t-1}));
}
2023-01-06 19:23:06 +01:00
`}};var P7=`
2022-11-18 17:13:29 +01:00
if (a == b) {
return 1.0;
};
2023-01-06 19:23:06 +01:00
return a / b;`,M7=`
2022-11-18 17:13:29 +01:00
// vec4 one = vec4(equal(a, b));
// return one + (vec4(1.0) - one) * a / b;
vec4 result = a / b;
if(a.x == b.x) {
result.x = 1.;
}
if(a.y == b.y) {
result.y = 1.;
}
if(a.z == b.z) {
result.z = 1.;
}
if(a.w == b.w) {
result.w = 1.;
}
return result;
2023-01-06 19:23:06 +01:00
`,Kw=tt({opSnippet:P7,packedOpSnippet:M7,checkOutOfBounds:!0}),sD={kernelName:Zo,backendName:"webgl",kernelFunc:Kw};var aD="return a - b;",qw=tt({opSnippet:aD,packedOpSnippet:aD,supportsComplex:!0,cpuKernelImpl:kE}),iD={kernelName:Yn,backendName:"webgl",kernelFunc:qw};function jw(r){let{inputs:e,backend:t,attrs:o}=r,{logits:n}=e,{dim:s}=o,a=y.parseAxisParam([s],n.shape),i=Hw({inputs:{x:n},backend:t,attrs:{reductionIndices:a,keepDims:!1}}),p=S.expandShapeToKeepDim(i.shape,a),u=re({inputs:{x:i},backend:t,attrs:{shape:p}}),c=qw({inputs:{a:n,b:u},backend:t}),l=Ww({inputs:{x:c},backend:t}),m=Vu({inputs:{x:l},backend:t,attrs:{axis:a,keepDims:!1}}),d=re({inputs:{x:m},backend:t,attrs:{shape:p}}),f=Kw({inputs:{a:l,b:d},backend:t});return t.disposeIntermediateTensorInfo(i),t.disposeIntermediateTensorInfo(u),t.disposeIntermediateTensorInfo(c),t.disposeIntermediateTensorInfo(l),t.disposeIntermediateTensorInfo(m),t.disposeIntermediateTensorInfo(d),f}var uD={kernelName:qn,backendName:"webgl",kernelFunc:jw};function L7(r){let{inputs:e,backend:t,attrs:o}=r,{logits:n}=e,{numSamples:s,seed:a,normalized:i}=o,p=i?n:jw({inputs:{logits:n},backend:t,attrs:{dim:n.shape.length-1}}),u=p.shape[0],c=p.shape[1],l=new Qh(u,c,s),m=[[a]],d=t.runWebGLProgram(l,[p],"int32",m);return i||t.disposeIntermediateTensorInfo(p),d}var pD={kernelName:wp,backendName:"webgl",kernelFunc:L7};var B7=zt+`
2022-11-18 17:13:29 +01:00
return -x;
2023-01-06 19:23:06 +01:00
`,V7=`
2022-11-18 17:13:29 +01:00
vec4 result = -x;
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
2023-01-06 19:23:06 +01:00
`;function z7(r){let{inputs:e,backend:t}=r,{x:o}=e;if(t.shouldExecuteOnCPU([o])){let s=t.texData.get(o.dataId),[a,i]=iE(s.values,o.shape,o.dtype);return t.makeTensorInfo(i,o.dtype,a)}let n;return O().getBool("WEBGL_PACK_UNARY_OPERATIONS")?n=new Er(o.shape,V7):n=new Jt(o.shape,B7),t.runWebGLProgram(n,[o],o.dtype)}var cD={kernelName:vs,backendName:"webgl",kernelFunc:z7};var W7=Vt.nonMaxSuppressionV3Impl;function U7(r){S.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p}=o,u=t.readSync(n.dataId),c=t.readSync(s.dataId),{selectedIndices:l}=W7(u,c,a,i,p);return t.makeTensorInfo([l.length],"int32",new Int32Array(l))}var lD={kernelName:Tn,backendName:"webgl",kernelFunc:U7};var G7=Vt.nonMaxSuppressionV4Impl;function H7(r){S.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,padToMaxOutputSize:u}=o,c=t.readSync(n.dataId),l=t.readSync(s.dataId),{selectedIndices:m,validOutputs:d}=G7(c,l,a,i,p,u);return[t.makeTensorInfo([m.length],"int32",new Int32Array(m)),t.makeTensorInfo([],"int32",new Int32Array([d]))]}var mD={kernelName:ba,backendName:"webgl",kernelFunc:H7};var K7=Vt.nonMaxSuppressionV5Impl;function q7(r){S.warn("tf.nonMaxSuppression() in webgl locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,softNmsSigma:u}=o,c=t.readSync(n.dataId),l=t.readSync(s.dataId),m=a,d=i,f=p,h=u,{selectedIndices:g,selectedScores:x}=K7(c,l,m,d,f,h);return[t.makeTensorInfo([g.length],"int32",new Int32Array(g)),t.makeTensorInfo([x.length],"float32",new Float32Array(x))]}var dD={kernelName:_n,backendName:"webgl",kernelFunc:q7};var Zh=class{constructor(e,t,o,n){this.variableNames=["indices"],this.outputShape=[e,t],this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int index = round(getIndices(coords.x));
setOutput(mix(float(${n}), float(${o}),
float(index == coords.y)));
}
2023-01-06 19:23:06 +01:00
`}};var j7=r=>{let{inputs:e,backend:t,attrs:o}=r,{indices:n}=e,{dtype:s,depth:a,onValue:i,offValue:p}=o,u=y.sizeFromShape(n.shape),c=new Zh(u,a,i,p),l=re({inputs:{x:n},backend:t,attrs:{shape:[u]}}),m=t.runWebGLProgram(c,[l],s);t.disposeIntermediateTensorInfo(l);let d=[...n.shape,a],f=re({inputs:{x:m},backend:t,attrs:{shape:d}});return t.disposeIntermediateTensorInfo(m),f},fD={kernelName:$n,backendName:"webgl",kernelFunc:j7};function Fl(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="complex64"){let n=Ka({inputs:{input:o},backend:t}),s=Fl({inputs:{x:n},backend:t}),a=Wu({inputs:{input:o},backend:t}),i=Fl({inputs:{x:a},backend:t}),p=Ar({inputs:{real:s,imag:i},backend:t});return t.disposeIntermediateTensorInfo(n),t.disposeIntermediateTensorInfo(s),t.disposeIntermediateTensorInfo(a),t.disposeIntermediateTensorInfo(i),p}else return qa({attrs:{shape:o.shape,dtype:o.dtype,value:o.dtype==="string"?"":0},backend:t})}var hD={kernelName:Os,backendName:"webgl",kernelFunc:Fl};function gD(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="string")throw new Error("onesLike is not supported under string dtype");if(o.dtype==="complex64"){let n=Ka({inputs:{input:o},backend:t}),s=gD({inputs:{x:n},backend:t}),a=Wu({inputs:{input:o},backend:t}),i=Fl({inputs:{x:a},backend:t}),p=Ar({inputs:{real:s,imag:i},backend:t});return t.disposeIntermediateTensorInfo(n),t.disposeIntermediateTensorInfo(s),t.disposeIntermediateTensorInfo(a),t.disposeIntermediateTensorInfo(i),p}else return qa({attrs:{shape:o.shape,dtype:o.dtype,value:1},backend:t})}var xD={kernelName:ks,backendName:"webgl",kernelFunc:gD};function X7(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o;if(e.length===1)return Ph({inputs:{input:e[0]},backend:t,attrs:{dim:n}});let s=e[0].shape,a=e[0].dtype;e.forEach(c=>{y.assertShapesMatch(s,c.shape,"All tensors passed to stack must have matching shapes"),y.assert(a===c.dtype,()=>"All tensors passed to stack must have matching dtypes")});let i=[],p=e.map(c=>{let l=Ph({inputs:{input:c},backend:t,attrs:{dim:n}});return i.push(l),l}),u=zw({inputs:p,backend:t,attrs:{axis:n}});return i.forEach(c=>t.disposeIntermediateTensorInfo(c)),u}var yD={kernelName:Ns,backendName:"webgl",kernelFunc:X7};var Jh=class{constructor(e,t,o){this.variableNames=["x"],this.customUniforms=[{name:"value",type:"float"}],this.outputShape=t.map((u,c)=>u[0]+e[c]+u[1]);let n=e.length,s=$e(n),a=t.map(u=>u[0]).join(","),i=t.map((u,c)=>u[0]+e[c]).join(","),p=["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,n);if(n===1){this.userCode=`
2022-11-18 17:13:29 +01:00
int start = ${a};
int end = ${i};
void main() {
int outC = getOutputCoords();
if (outC < start || outC >= end) {
setOutput(value);
} else {
setOutput(getX(outC - start));
}
}
`;return}this.userCode=`
${s} start = ${s}(${a});
${s} end = ${s}(${i});
void main() {
${s} outC = getOutputCoords();
if (any(lessThan(outC, start)) || any(greaterThanEqual(outC, end))) {
setOutput(value);
} else {
${s} coords = outC - start;
setOutput(getX(${p}));
}
}
2023-01-06 19:23:06 +01:00
`}};var eg=class{constructor(e,t,o){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0,this.customUniforms=[{name:"value",type:"float"}],this.outputShape=t.map((h,g)=>h[0]+e[g]+h[1]);let n=e.length,s=$e(n),a=t.map(h=>h[0]).join(","),i=t.map((h,g)=>h[0]+e[g]).join(","),p=Et("rc",n),u=Et("source",n),c=`${p[n-1]} < ${this.outputShape[n-1]}`,l=n===1?"source":`vec2(${u.slice(-2).join()})`,m=[`${s} rc = outputLoc;`,`${p[n-1]} += 1;
2022-11-18 17:13:29 +01:00
if(${c}) {
`,n===1?"":`}
rc = outputLoc;
${p[n-2]} += 1;
if(${p[n-2]} < ${this.outputShape[n-2]}) {`,n===1?"":` ${p[n-1]} += 1;
2022-11-20 22:20:02 +01:00
if(${c}) {`],d=n===1?"rc < start || rc >= end":"any(lessThan(rc, start)) || any(greaterThanEqual(rc, end))",f="";for(let h=0,g=n===1?2:4;h<g;h++)f+=`
2022-11-18 17:13:29 +01:00
${m[h]}
2022-11-20 22:20:02 +01:00
if (${d}) {
2022-11-18 17:13:29 +01:00
result[${h}] = float(value);
} else {
${s} source = rc - start;
result[${h}] = getChannel(getX(${u.join()}), ${l});
}
2022-11-20 22:20:02 +01:00
`;f+=n===1?"} ":"}}",this.userCode=`
2022-11-18 17:13:29 +01:00
const ${s} start = ${s}(${a});
const ${s} end = ${s}(${i});
void main() {
${s} outputLoc = getOutputCoords();
vec4 result = vec4(0.);
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`}};var Xw=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{paddings:s,constantValue:a}=o;if(y.sizeFromShape(n.shape)===0){let u=s.map((c,l)=>c[0]+n.shape[l]+c[1]);return qa({backend:t,attrs:{shape:u,value:a,dtype:n.dtype}})}let i=O().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new eg(n.shape,s,a):new Jh(n.shape,s,a),p=[[a]];return t.runWebGLProgram(i,[n],n.dtype,p)},bD={kernelName:En,backendName:"webgl",kernelFunc:Xw};var Y7=`
2022-11-18 17:13:29 +01:00
if(a < 0.0 && floor(b) < b){
return NAN;
}
if (b == 0.0) {
return 1.0;
}
return (round(mod(b, 2.0)) != 1) ?
pow(abs(a), b) : sign(a) * pow(abs(a), b);
2023-01-06 19:23:06 +01:00
`,Q7=`
2022-11-18 17:13:29 +01:00
// isModRound1 has 1 for components with round(mod(b, 2.0)) == 1, 0 otherwise.
vec4 isModRound1 = vec4(equal(round(mod(b, 2.0)), ivec4(1)));
vec4 multiplier = sign(a) * isModRound1 + (vec4(1.0) - isModRound1);
vec4 result = multiplier * pow(abs(a), b);
// Ensure that a^0 = 1, including 0^0 = 1 as this correspond to TF and JS
bvec4 isExpZero = equal(b, vec4(0.0));
result.r = isExpZero.r ? 1.0 : result.r;
result.g = isExpZero.g ? 1.0 : result.g;
result.b = isExpZero.b ? 1.0 : result.b;
result.a = isExpZero.a ? 1.0 : result.a;
bvec4 isNaN1 = lessThan(a, vec4(0.0));
bvec4 isNaN2 = lessThan(floor(b), b);
bvec4 isNaN = bvec4(isNaN1.x && isNaN2.x, isNaN1.y && isNaN2.y, isNaN1.z && isNaN2.z, isNaN1.w && isNaN2.w);
2023-01-06 19:23:06 +01:00
`+Js+`
2022-11-18 17:13:29 +01:00
return result;
2023-01-06 19:23:06 +01:00
`,Z7=tt({opSnippet:Y7,packedOpSnippet:Q7}),CD={kernelName:An,backendName:"webgl",kernelFunc:Z7};function J7(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o,i=n.shape.length,p=[],u=y.parseAxisParam(s,n.shape),c=u,l=S.getAxesPermutation(c,i),m=n;l!=null&&(m=yt({inputs:{x:n},backend:t,attrs:{perm:l}}),c=S.getInnerMostAxes(c.length,i),p.push(m)),S.assertAxesAreInnerMostDims("prod",c,i);let d;if(t.shouldExecuteOnCPU([m])){let f=t.texData.get(m.dataId).values,{outVals:h,outShape:g,outDtype:x}=pE(m.shape,m.dtype,f,c);d=t.makeTensorInfo(g,x,h)}else{let[f,h]=S.computeOutAndReduceShapes(m.shape,c),g=y.sizeFromShape(h),x=re({inputs:{x:m},backend:t,attrs:{shape:[-1,g]}}),b=Ta(n.dtype),C=Ur(x,b,"prod",t);d=re({inputs:{x:C},backend:t,attrs:{shape:f}}),p.push(x),p.push(C)}if(a){p.push(d);let f=S.expandShapeToKeepDim(d.shape,u);d=re({inputs:{x:d},backend:t,attrs:{shape:f}})}return p.forEach(f=>t.disposeIntermediateTensorInfo(f)),d}var SD={kernelName:Dn,backendName:"webgl",kernelFunc:J7};function eZ(r){let{inputs:e,backend:t,attrs:o}=r,{paramsNestedSplits:n,paramsDenseValues:s,indices:a}=e,{outputRaggedRank:i}=o,p=n.map(x=>t.readSync(x.dataId)),u=n.map(x=>x.shape),c=t.readSync(s.dataId),l=t.readSync(a.dataId),[m,d,f]=cE(p,u,c,s.shape,s.dtype,l,a.shape,i),h=m.map(x=>t.makeTensorInfo([x.length],"int32",x)),g=t.makeTensorInfo(f,s.dtype,d);return h.concat([g])}var wD={kernelName:Ip,backendName:"webgl",kernelFunc:eZ};function tZ(r){let{inputs:e,backend:t}=r,{starts:o,limits:n,deltas:s}=e,a=t.readSync(o.dataId),i=t.readSync(n.dataId),p=t.readSync(s.dataId),[u,c]=lE(a,o.shape,o.dtype,i,n.shape,p,s.shape),l=t.makeTensorInfo([u.length],"int32",u),m=t.makeTensorInfo([c.length],o.dtype,c);return[l,m]}var ID={kernelName:vp,backendName:"webgl",kernelFunc:tZ};function rZ(r){let{inputs:e,backend:t,attrs:o}=r,{shape:n,values:s,defaultValue:a,rowPartitionTensors:i}=e,{rowPartitionTypes:p}=o,u=t.readSync(n.dataId),c=t.readSync(s.dataId),l=t.readSync(a.dataId),m=i.map(g=>t.readSync(g.dataId)),d=i.map(g=>g.shape),[f,h]=mE(u,n.shape,c,s.shape,s.dtype,l,a.shape,m,d,p);return t.makeTensorInfo(f,s.dtype,h)}var vD={kernelName:kp,backendName:"webgl",kernelFunc:rZ};var Yw=r=>{let{backend:e,attrs:t}=r,{start:o,stop:n,step:s,dtype:a}=t,i=dE(o,n,s,a);return e.makeTensorInfo([i.length],a,i)},kD={kernelName:Ts,backendName:"webgl",kernelFunc:Yw};var oZ="return 1.0 / x;",nZ=he({opSnippet:oZ}),ND={kernelName:Fn,backendName:"webgl",kernelFunc:nZ};var sZ=zt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : x;
2023-01-06 19:23:06 +01:00
`,aZ=`
2022-11-18 17:13:29 +01:00
vec4 result = x * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
2023-01-06 19:23:06 +01:00
`,iZ=he({opSnippet:sZ,packedOpSnippet:aZ}),TD={kernelName:On,backendName:"webgl",kernelFunc:iZ};var uZ=zt+`
2022-11-18 17:13:29 +01:00
return (x < 0.0) ? 0.0 : min(6.0, x);
2023-01-06 19:23:06 +01:00
`,pZ=`
2022-11-18 17:13:29 +01:00
vec4 result = min(x, vec4(6.)) * vec4(greaterThanEqual(x, vec4(0.0)));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
2023-01-06 19:23:06 +01:00
`,cZ=he({opSnippet:uZ,packedOpSnippet:pZ}),_D={kernelName:Ln,backendName:"webgl",kernelFunc:cZ};var tg=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m;s?m="(vec2(yRC) + vec2(0.5)) * effectiveInputOverOutputRatioRC - vec2(0.5)":m="vec2(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec2 effectiveInputOverOutputRatioRC = vec2(
${c[0]/l[0]},
${c[1]/l[1]});
const vec2 inputShapeRC = vec2(${i}.0, ${p}.0);
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 yRC = coords.yz;
// Fractional source index.
vec2 sourceFracIndexRC = ${m};
// Compute the four integer indices.
ivec2 sourceFloorRC = ivec2(max(sourceFracIndexRC, vec2(0.0)));
ivec2 sourceCeilRC = ivec2(
min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)));
float topLeft = getA(b, sourceFloorRC.x, sourceFloorRC.y, d);
float bottomLeft = getA(b, sourceCeilRC.x, sourceFloorRC.y, d);
float topRight = getA(b, sourceFloorRC.x, sourceCeilRC.y, d);
float bottomRight = getA(b, sourceCeilRC.x, sourceCeilRC.y, d);
vec2 fracRC = sourceFracIndexRC - vec2(sourceFloorRC);
float top = topLeft + (topRight - topLeft) * fracRC.y;
float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y;
float newValue = top + (bottom - top) * fracRC.x;
setOutput(newValue);
}
2022-11-20 22:20:02 +01:00
`}};var rg=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m;s?m="(vec3(yRC) + vec3(0.5)) * effectiveInputOverOutputRatioRC - vec3(0.5)":m="vec3(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec3 effectiveInputOverOutputRatioRC = vec3(
${c[0]/l[0]},
${c[1]/l[1]},
${c[1]/l[1]});
const vec3 inputShapeRC = vec3(${i}.0, ${p}.0,
${p}.0);
float getAValue(int b, int r, int c, int d) {
return getChannel(getA(b, r, c, d), vec2(c, d));
}
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
// Calculate values for next column in yRC.z.
ivec3 yRC = coords.yzz + ivec3(0, 0, 1);
// Fractional source index.
vec3 sourceFracIndexRC = ${m};
// Compute the four integer indices.
ivec3 sourceFloorRC = ivec3(max(sourceFracIndexRC, vec3(0.0)));
ivec3 sourceCeilRC = ivec3(
min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)));
// Should we calculate next column and row elements in 2x2 packed cell.
bool hasNextCol = d < ${u-1};
bool hasNextRow = coords.z < ${o-1};
// In parallel, construct four corners for all four components in
// packed 2x2 cell.
vec4 topLeft = vec4(
getAValue(b, sourceFloorRC.x, sourceFloorRC.y, d),
hasNextCol ? getAValue(b, sourceFloorRC.x, sourceFloorRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceFloorRC.x, sourceFloorRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceFloorRC.x, sourceFloorRC.z, d + 1) : 0.0);
vec4 bottomLeft = vec4(
getAValue(b, sourceCeilRC.x, sourceFloorRC.y, d),
hasNextCol ? getAValue(b, sourceCeilRC.x, sourceFloorRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceCeilRC.x, sourceFloorRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceCeilRC.x, sourceFloorRC.z, d + 1) : 0.0);
vec4 topRight = vec4(
getAValue(b, sourceFloorRC.x, sourceCeilRC.y, d),
hasNextCol ? getAValue(b, sourceFloorRC.x, sourceCeilRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceFloorRC.x, sourceCeilRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceFloorRC.x, sourceCeilRC.z, d + 1) : 0.0);
vec4 bottomRight = vec4(
getAValue(b, sourceCeilRC.x, sourceCeilRC.y, d),
hasNextCol ? getAValue(b, sourceCeilRC.x, sourceCeilRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceCeilRC.x, sourceCeilRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceCeilRC.x, sourceCeilRC.z, d + 1) : 0.0);
vec3 fracRC = sourceFracIndexRC - vec3(sourceFloorRC);
vec4 top = mix(topLeft, topRight, fracRC.yyzz);
vec4 bottom = mix(bottomLeft, bottomRight, fracRC.yyzz);
vec4 newValue = mix(top, bottom, fracRC.x);
setOutput(newValue);
}
2023-01-06 19:23:06 +01:00
`}};function lZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=O().getBool("WEBGL_PACK_IMAGE_OPERATIONS")?new rg(n.shape,p,u,s,a):new tg(n.shape,p,u,s,a);return t.runWebGLProgram(c,[n],"float32")}var $D={kernelName:Mn,backendName:"webgl",kernelFunc:lZ};var og=class{constructor(e,t,o){this.variableNames=["dy"],this.outputShape=[],this.outputShape=t;let[,n,s]=t,[,a,i]=e,p=[o&&a>1?n-1:n,o&&i>1?s-1:s],u=[o&&a>1?a-1:a,o&&i>1?i-1:i],c=p[0]/u[0],l=p[1]/u[1],m=1/c,d=1/l,f=Math.ceil(m)*2+2,h=Math.ceil(d)*2+2;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
int r = coords[1];
int c = coords[2];
float accumulator = 0.0;
const float heightScale = float(${c});
const float widthScale = float(${l});
const float invHeightScale = float(${m});
2022-11-20 22:20:02 +01:00
const float invWidthScale = float(${d});
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
const int winHeight = int(${f});
2022-11-18 17:13:29 +01:00
const int winWidth = int(${h});
// Compute bounds for where in dy we will look
float startRLerp = floor(float(r) * invHeightScale);
int startDyR = int(startRLerp - float(winHeight / 2));
float startCLerp = floor(float(c) * invWidthScale);
int startDyC = int(startCLerp - float(winWidth / 2));
// Loop over dy
for (int dyROffset = 0; dyROffset < winHeight; dyROffset++) {
int dyR = dyROffset + startDyR;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= ${a}) {
continue;
}
for (int dyCOffset = 0; dyCOffset < winWidth; dyCOffset++) {
int dyC = dyCOffset + startDyC;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= ${i}) {
continue;
}
float dxR = float(dyR) * heightScale;
int topDxRIndex = int(floor(dxR));
int bottomDxRIndex = int(min(ceil(dxR), ${n-1}.0));
float dxRLerp = dxR - float(topDxRIndex);
float inverseDxRLerp = 1.0 - dxRLerp;
float dxC = float(dyC) * widthScale;
int leftDxCIndex = int(floor(dxC));
int rightDxCIndex = int(min(ceil(dxC), ${s-1}.0));
float dxCLerp = dxC - float(leftDxCIndex);
float inverseDxCLerp = 1.0 - dxCLerp;
if (r == topDxRIndex && c == leftDxCIndex) {
// topLeft
accumulator +=
getDy(b, dyR, dyC, d) * inverseDxRLerp * inverseDxCLerp;
}
if (r == topDxRIndex && c == rightDxCIndex) {
// topRight
accumulator += getDy(b, dyR, dyC, d) * inverseDxRLerp * dxCLerp;
}
if (r == bottomDxRIndex && c == leftDxCIndex) {
// bottomLeft
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * inverseDxCLerp;
}
if (r == bottomDxRIndex && c == rightDxCIndex) {
// bottomRight
accumulator += getDy(b, dyR, dyC, d) * dxRLerp * dxCLerp;
}
}
}
// End loop over dy
setOutput(accumulator);
}
2023-01-06 19:23:06 +01:00
`}};function mZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n,dy:s}=e,{alignCorners:a}=o,i=new og(s.shape,n.shape,a);return t.runWebGLProgram(i,[s],s.dtype)}var ED={kernelName:Am,backendName:"webgl",kernelFunc:mZ};var ng=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m=n?"0.5":"0.0",d;s?d="max((vec2(yRC) + vec2(0.5)) * effectiveInputOverOutputRatioRC, vec2(0.0))":d="vec2(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec2 effectiveInputOverOutputRatioRC = vec2(
${c[0]/l[0]},
${c[1]/l[1]});
const vec2 inputShapeRC = vec2(${i}.0, ${p}.0);
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
ivec2 yRC = coords.yz;
// Fractional source index.
2022-11-20 22:20:02 +01:00
vec2 sourceFracIndexRC = ${d};
2022-11-18 17:13:29 +01:00
// Compute the coordinators of nearest neighbor point.
ivec2 sourceNearestRC = ivec2(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + ${m})));
float newValue = getA(b, sourceNearestRC.x, sourceNearestRC.y, d);
setOutput(newValue);
}
2022-11-20 22:20:02 +01:00
`}};var sg=class{constructor(e,t,o,n,s){this.variableNames=["A"],this.packedInputs=!0,this.packedOutput=!0,this.outputShape=[];let[a,i,p,u]=e;this.outputShape=[a,t,o,u];let c=[n&&t>1?i-1:i,n&&o>1?p-1:p],l=[n&&t>1?t-1:t,n&&o>1?o-1:o],m=n?"0.5":"0.0",d;s?d="max((vec3(yRC) + vec3(0.5)) * effectiveInputOverOutputRatioRC, vec3(0.0))":d="vec3(yRC) * effectiveInputOverOutputRatioRC",this.userCode=`
2022-11-18 17:13:29 +01:00
const vec3 effectiveInputOverOutputRatioRC = vec3(
${c[0]/l[0]},
${c[1]/l[1]},
${c[1]/l[1]});
const vec3 inputShapeRC = vec3(${i}.0, ${p}.0,
${p}.0);
float getAValue(int b, int r, int c, int d) {
return getChannel(getA(b, r, c, d), vec2(c, d));
}
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
// Calculate values for next column in yRC.z.
ivec3 yRC = coords.yzz + ivec3(0, 0, 1);
// Fractional source index.
2022-11-20 22:20:02 +01:00
vec3 sourceFracIndexRC = ${d};
2022-11-18 17:13:29 +01:00
// Compute the coordinators of nearest neighbor point.
ivec3 sourceNearestRC = ivec3(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + ${m})));
// Should we calculate next column and row elements in 2x2 packed cell.
bool hasNextCol = d < ${u-1};
bool hasNextRow = coords.z < ${o-1};
vec4 newValue = vec4(
getAValue(b, sourceNearestRC.x, sourceNearestRC.y, d),
hasNextCol ? getAValue(b, sourceNearestRC.x, sourceNearestRC.y, d + 1)
: 0.0,
hasNextRow ? getAValue(b, sourceNearestRC.x, sourceNearestRC.z, d)
: 0.0,
(hasNextRow && hasNextCol) ?
getAValue(b, sourceNearestRC.x, sourceNearestRC.z, d + 1) : 0.0);
setOutput(newValue);
}
2023-01-06 19:23:06 +01:00
`}};function dZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=O().getBool("WEBGL_PACK_IMAGE_OPERATIONS")?new sg(n.shape,p,u,s,a):new ng(n.shape,p,u,s,a);return t.runWebGLProgram(c,[n],n.dtype)}var AD={kernelName:Pn,backendName:"webgl",kernelFunc:dZ};var ag=class{constructor(e,t,o){this.variableNames=["dy"],this.outputShape=[],this.outputShape=t;let[,n,s]=t,[,a,i]=e,p=[o&&a>1?n-1:n,o&&i>1?s-1:s],u=[o&&a>1?a-1:a,o&&i>1?i-1:i],c=p[0]/u[0],l=p[1]/u[1],m=1/c,d=1/l,f=Math.ceil(m)*2+2,h=Math.ceil(d)*2+2;this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec4 coords = getOutputCoords();
int b = coords[0];
int d = coords[3];
int r = coords[1];
int c = coords[2];
float accumulator = 0.0;
const float heightScale = float(${c});
const float widthScale = float(${l});
const float invHeightScale = float(${m});
2022-11-20 22:20:02 +01:00
const float invWidthScale = float(${d});
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
const int winHeight = int(${f});
2022-11-18 17:13:29 +01:00
const int winWidth = int(${h});
// Compute bounds for where in dy we will look
float startRLerp = floor(float(r) * invHeightScale);
int startDyR = int(floor(startRLerp - float(winHeight / 2)));
float startCLerp = floor(float(c) * invWidthScale);
int startDyC = int(floor(startCLerp - float(winWidth / 2)));
// Loop over dy
for (int dyROffset = 0; dyROffset < winHeight; dyROffset++) {
int dyR = dyROffset + startDyR;
// Guard against the window exceeding the bounds of dy
if (dyR < 0 || dyR >= ${a}) {
continue;
}
for (int dyCOffset = 0; dyCOffset < winWidth; dyCOffset++) {
int dyC = dyCOffset + startDyC;
// Guard against the window exceeding the bounds of dy
if (dyC < 0 || dyC >= ${i}) {
continue;
}
float sourceFracRow =
float(${p[0]}) *
(float(dyR) / float(${u[0]}));
float sourceFracCol =
float(${p[1]}) *
(float(dyC) / float(${u[1]}));
int sourceNearestRow = int(min(
float(int(${n}) - 1),
${o} ? float(round(sourceFracRow)) :
float(floor(sourceFracRow))));
int sourceNearestCol = int(min(
float(int(${s}) - 1),
${o} ? float(round(sourceFracCol)) :
float(floor(sourceFracCol))));
if (r == sourceNearestRow && c == sourceNearestCol) {
accumulator += getDy(b, dyR, dyC, d);
}
}
}
// End loop over dy
setOutput(accumulator);
}
2023-01-06 19:23:06 +01:00
`}};function fZ(r){let{inputs:e,backend:t,attrs:o}=r,{images:n,dy:s}=e,{alignCorners:a}=o,i=new ag(s.shape,n.shape,a);return t.runWebGLProgram(i,[s],s.dtype)}var RD={kernelName:Em,backendName:"webgl",kernelFunc:fZ};var ig=class{constructor(e,t){this.variableNames=["x"];let o=e.length;if(o>4)throw new Error(`WebGL backend: Reverse of rank-${o} tensor is not yet supported`);if(this.outputShape=e,o===1){this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
int coord = getOutputCoords();
setOutput(getX(${e[0]} - coord - 1));
}
2023-01-06 19:23:06 +01:00
`;return}let n=i=>t.indexOf(i)!==-1&&e[i]!==1?`${e[i]} - coords[${i}] - 1`:`coords[${i}]`,s=e.map((i,p)=>n(p)).join(","),a=$e(o);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${a} coords = getOutputCoords();
setOutput(getX(${s}));
}
2023-01-06 19:23:06 +01:00
`}};var ug=class{constructor(e,t){this.variableNames=["x"],this.packedInputs=!0,this.packedOutput=!0;let o=e.length;if(o>4)throw new Error(`WebGL backend: Reverse of rank-${o} tensor is not yet supported`);this.outputShape=e;let n=Et("rc",o),s=`${n[o-1]} + 1 < ${this.outputShape[o-1]}`,a=`${n[o-2]} + 1 < ${this.outputShape[o-2]}`,i=$e(o);o===1?this.userCode=`
2022-11-18 17:13:29 +01:00
void main(){
int rc = getOutputCoords();
vec4 result = vec4(0.);
result.r = getChannel(getX(${e[0]} - rc - 1),
${e[0]} - rc - 1);
if(${s}){
result.g = getChannel(getX(${e[0]} - (rc + 1) - 1),
${e[0]} - (rc + 1) - 1);
}
setOutput(result);
}
`:this.userCode=`
void main() {
${i} rc = getOutputCoords();
vec4 result = vec4(0.);
result.r = ${p(n.slice())};
if(${s}){
result.g = ${u(n.slice())};
}
if(${a}) {
result.b = ${c(n.slice())};
if(${s}) {
result.a = ${l(n.slice())};
}
}
setOutput(result);
}
2023-01-06 19:23:06 +01:00
`;function p(f){return m(f)}function u(f){return f[o-1]="("+f[o-1]+" + 1)",m(f)}function c(f){return f[o-2]="("+f[o-2]+" + 1)",m(f)}function l(f){return f[o-1]="("+f[o-1]+" + 1)",f[o-2]="("+f[o-2]+" + 1)",m(f)}function m(f){let h=e.map((b,C)=>d(C,f)),g=h.join(","),x=h.slice(-2).join(",");return`getChannel(getX(${g}), vec2(${x}))`}function d(f,h){return t.indexOf(f)!==-1&&e[f]!==1?`${e[f]} - ${h[f]} - 1`:`${h[f]}`}}};function hZ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{dims:s}=o,a=n.shape.length,i=y.parseAxisParam(s,n.shape);if(a===0)return At({inputs:{x:n},backend:t});let p=O().getBool("WEBGL_PACK_ARRAY_OPERATIONS")?new ug(n.shape,i):new ig(n.shape,i);return t.runWebGLProgram(p,[n],n.dtype)}var DD={kernelName:Bn,backendName:"webgl",kernelFunc:hZ};var pg=class{constructor(e,t){this.variableNames=["Image"],this.outputShape=[],this.customUniforms=[{name:"params",type:"vec4"}];let o=e[1],n=e[2];this.outputShape=e;let s="";typeof t=="number"?s=`float outputValue = ${t.toFixed(2)};`:s=`
2022-11-18 17:13:29 +01:00
vec3 fill = vec3(${t.join(",")});
float outputValue = fill[coords[3]];`,this.userCode=`
void main() {
ivec4 coords = getOutputCoords();
int x = coords[2];
int y = coords[1];
float coordXFloat = (float(x) - params[0]) * params[3] -
(float(y) - params[1]) * params[2];
float coordYFloat = (float(x) - params[0]) * params[2] +
(float(y) - params[1]) * params[3];
int coordX = int(round(coordXFloat + params[0]));
int coordY = int(round(coordYFloat + params[1]));
${s}
if(coordX >= 0 && coordX < ${n} && coordY >= 0 && coordY < ${o}) {
outputValue = getImage(coords[0], coordY, coordX, coords[3]);
}
setOutput(outputValue);
}
2023-01-06 19:23:06 +01:00
`}};var FD={kernelName:ts,backendName:"webgl",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{image:o}=r,{radians:n,fillValue:s,center:a}=e,i=t,p=new pg(o.shape,s),[u,c]=S.getImageCenter(a,o.shape[1],o.shape[2]),l=[[u,c,Math.sin(n),Math.cos(n)]];return i.runWebGLProgram(p,[o],o.dtype,l)}};var gZ=`
2022-11-18 17:13:29 +01:00
// OpenGL ES does not support round function.
// The algorithm is based on banker's rounding.
float base = floor(x);
if ((x - base) < 0.5) {
return floor(x);
} else if ((x - base) > 0.5) {
return ceil(x);
} else {
if (mod(base, 2.0) == 0.0) {
return base;
} else {
return base + 1.0;
}
}
2023-01-06 19:23:06 +01:00
`,xZ=he({opSnippet:gZ}),OD={kernelName:Vn,backendName:"webgl",kernelFunc:xZ};var yZ="return inversesqrt(x);",bZ=he({opSnippet:yZ,cpuKernelImpl:fE}),PD={kernelName:zn,backendName:"webgl",kernelFunc:bZ};var Sc=class{constructor(e,t,o,n,s,a,i=!0){this.variableNames=["updates","indices","defaultValue"],this.outputShape=a;let p=$e(s.length),u=$e(a.length),c="";o===1?c="i":o===2&&(c="i, j");let l=`getIndices(${c})`,m="";n===1?m="i":n===2&&(m="i, coords[1]");let d=`getUpdates(${m})`,f=t>1?"strides[j]":"strides";this.userCode=`
2022-11-18 17:13:29 +01:00
${p} strides = ${p}(${s});
void main() {
${u} coords = getOutputCoords();
float sum = 0.0;
bool found = false;
for (int i = 0; i < ${e}; i++) {
int flattenedIndex = 0;
for (int j = 0; j < ${t}; j++) {
int index = round(${l});
2022-11-20 22:20:02 +01:00
flattenedIndex += index * ${f};
2022-11-18 17:13:29 +01:00
}
if (flattenedIndex == coords[0]) {
2022-11-20 22:20:02 +01:00
sum += ${d};
2022-11-18 17:13:29 +01:00
found = true;
}
}
setOutput(mix(getDefaultValue(), sum, float(found)));
}
2023-01-06 19:23:06 +01:00
`}};function CZ(r){let{inputs:e,backend:t,attrs:o}=r,{indices:n,updates:s}=e,{shape:a}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=S.calculateShapes(s,n,a),m=[l/u,u];if(l===0)return t.makeTensorInfo(a,n.dtype);let d=re({inputs:{x:n},backend:t,attrs:{shape:[p,i]}}),f=re({inputs:{x:s},backend:t,attrs:{shape:[p,u]}}),h=t.makeTensorInfo([],"float32",new Float32Array([0])),g=new Sc(p,i,d.shape.length,f.shape.length,c,m),x=t.runWebGLProgram(g,[f,d,h],f.dtype),b=re({inputs:{x},backend:t,attrs:{shape:a}});return t.disposeIntermediateTensorInfo(d),t.disposeIntermediateTensorInfo(f),t.disposeIntermediateTensorInfo(x),t.disposeIntermediateTensorInfo(h),b}var MD={kernelName:Wn,backendName:"webgl",kernelFunc:CZ};var cg=class{constructor(e,t,o,n){this.variableNames=["sortedSequence","values"],this.customUniforms=[{name:"numInputs",type:"int"}],this.outputShape=[e,o];let s="while (left < right) {",a=`for (int i = 0; i < ${Math.ceil(Math.log2(t+1))}; ++i) { if (left >= right) break;`,i=O().getNumber("WEBGL_VERSION")===2?s:a,p=n==="left"?"<":"<=";this.userCode=`
2022-11-18 17:13:29 +01:00
int findBound(int batch, float value) {
int left = 0;
int right = numInputs;
int mid;
${i}
mid = (left + right) / 2;
if (getSortedSequence(batch, mid) ${p} value) {
left = mid + 1;
} else {
right = mid;
}
}
return right;
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int valueIndex = coords[1];
float value = getValues(batch, valueIndex);
setOutput(float(findBound(batch, value)));
}
2023-01-06 19:23:06 +01:00
`}};function SZ(r){let{inputs:e,backend:t,attrs:o}=r,{sortedSequence:n,values:s}=e,{side:a}=o,i=new cg(n.shape[0],n.shape[1],s.shape[1],a),p=[[n.shape[1]]];return t.runWebGLProgram(i,[n,s],"int32",p)}var LD={kernelName:fi,backendName:"webgl",kernelFunc:SZ};var lg=class{constructor(e,t,o){this.variableNames=["c","a","b"],this.outputShape=t;let n,s;if(o>4)throw Error(`Where for rank ${o} is not yet supported`);if(o===1)s="resRC",n="resRC";else{let i=["resRC.x","resRC.y","resRC.z","resRC.w"],p=[],u=[];for(let c=0;c<t.length;c++)u.push(`${i[c]}`),c<e&&p.push(`${i[c]}`);n=p.join(),s=u.join()}let a=$e(o);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${a} resRC = getOutputCoords();
float cVal = getC(${n});
if (cVal >= 1.0) {
setOutput(getA(${s}));
} else {
setOutput(getB(${s}));
}
}
2023-01-06 19:23:06 +01:00
`}};function wZ(r){let{inputs:e,backend:t}=r,{condition:o,t:n,e:s}=e,a=new lg(o.shape.length,n.shape,n.shape.length);return t.runWebGLProgram(a,[o,n,s],dt(n.dtype,s.dtype))}var BD={kernelName:$s,backendName:"webgl",kernelFunc:wZ};var IZ=`
2022-11-18 17:13:29 +01:00
// Stable and Attracting Fixed Point (0, 1) for Normalized Weights.
// see: https://arxiv.org/abs/1706.02515
2022-11-20 22:20:02 +01:00
float scaleAlpha = ${S.SELU_SCALEALPHA};
float scale = ${S.SELU_SCALE};
2022-11-18 17:13:29 +01:00
return (x >= 0.0) ? scale * x : scaleAlpha * (exp(x) - 1.0);
2023-01-06 19:23:06 +01:00
`,vZ=he({opSnippet:IZ}),VD={kernelName:Ca,backendName:"webgl",kernelFunc:vZ};var kZ=$o+`
2022-11-18 17:13:29 +01:00
return 1.0 / (1.0 + exp(-1.0 * x));
2023-01-06 19:23:06 +01:00
`,NZ=`
2022-11-18 17:13:29 +01:00
vec4 result = 1.0 / (1.0 + exp(-1.0 * x));
bvec4 isNaN = isnan(x);
result.r = isNaN.r ? x.r : result.r;
result.g = isNaN.g ? x.g : result.g;
result.b = isNaN.b ? x.b : result.b;
result.a = isNaN.a ? x.a : result.a;
return result;
2023-01-06 19:23:06 +01:00
`,TZ=he({opSnippet:kZ,packedOpSnippet:NZ,cpuKernelImpl:gE}),zD={kernelName:Gn,backendName:"webgl",kernelFunc:TZ};var _Z=`
2022-11-18 17:13:29 +01:00
if (isnan(x)) { return 0.0; }
return sign(x);
2023-01-06 19:23:06 +01:00
`,$Z=he({opSnippet:_Z}),WD={kernelName:wa,backendName:"webgl",kernelFunc:$Z};var EZ=$o+`
2022-11-18 17:13:29 +01:00
return sin(x);
2023-01-06 19:23:06 +01:00
`,AZ=he({opSnippet:EZ}),UD={kernelName:Un,backendName:"webgl",kernelFunc:AZ};var RZ=`
2022-11-18 17:13:29 +01:00
float e2x = exp(x);
return (e2x - 1.0 / e2x) / 2.0;
2023-01-06 19:23:06 +01:00
`,DZ=he({opSnippet:RZ}),GD={kernelName:Sa,backendName:"webgl",kernelFunc:DZ};var FZ=`
2022-11-18 17:13:29 +01:00
float epsilon = 1.1920928955078125e-7;
float threshold = log(epsilon) + 2.0;
bool too_large = x > -threshold;
bool too_small = x < threshold;
float result;
float exp_x = exp(x);
if (too_large){
result = x;
}
else if (too_small){
result = exp_x;
}
else{
result = log(exp_x + 1.0);
}
return result;
2023-01-06 19:23:06 +01:00
`,OZ=he({opSnippet:FZ}),HD={kernelName:Ia,backendName:"webgl",kernelFunc:OZ};var PZ=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,paddings:a}=o;y.assert(n.shape.length<=4,()=>"spaceToBatchND for rank > 4 with a WebGL backend not implemented yet");let i=s.reduce((x,b)=>x*b),p=[[0,0]];p.push(...a);for(let x=1+s.length;x<n.shape.length;++x)p.push([0,0]);let u=[],c=Xw({inputs:{x:n},backend:t,attrs:{paddings:p,constantValue:0}}),l=S.getReshaped(c.shape,s,i,!1),m=S.getPermuted(l.length,s.length,!1),d=S.getReshapedPermuted(c.shape,s,i,!1),f=re({inputs:{x:c},backend:t,attrs:{shape:l}}),h=yt({inputs:{x:f},backend:t,attrs:{perm:m}}),g=re({inputs:{x:h},backend:t,attrs:{shape:d}});return u.push(c),u.push(f),u.push(h),u.forEach(x=>t.disposeIntermediateTensorInfo(x)),g},KD={kernelName:As,backendName:"webgl",kernelFunc:PZ};function MZ(r){let{inputs:e,backend:t}=r,{indices:o,values:n,denseShape:s,defaultValue:a}=e;if(s.shape.length!==1)throw new Error(`Dense shape must be a vector, saw:
2022-11-18 17:13:29 +01:00
${s.shape}`);if(o.shape.length!==2)throw new Error(`Indices must be a matrix, saw:
${o.shape}`);if(n.shape.length!==1)throw new Error(`Values must be a vector, saw:
${n.shape}`);if(a.shape.length!==0)throw new Error(`Default value must be a scalar, saw:
2023-01-06 19:23:06 +01:00
${a.shape}`);let i=t.readSync(o.dataId),p=t.readSync(n.dataId),u=t.readSync(s.dataId),c=t.readSync(a.dataId)[0],[l,m,d,f,h]=yE(i,o.shape,o.dtype,p,n.dtype,u,c);return[t.makeTensorInfo(m,o.dtype,l),t.makeTensorInfo([m[0]],n.dtype,d),t.makeTensorInfo([f.length],"bool",new Uint8Array(f.map(g=>Number(g)))),t.makeTensorInfo([h.length],o.dtype,new Int32Array(h))]}var qD={kernelName:hi,backendName:"webgl",kernelFunc:MZ};function LZ(r){let{inputs:e,backend:t}=r,{inputIndices:o,inputShape:n,newShape:s}=e;if(o.shape.length!==2)throw new Error(`Input indices should be a matrix but received shape ${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape ${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=Array.from(t.readSync(n.dataId)),i=t.readSync(o.dataId),p=Array.from(t.readSync(s.dataId)),[u,c,l]=bE(i,o.shape,o.dtype,a,p);return[t.makeTensorInfo(c,o.dtype,u),t.makeTensorInfo([l.length],s.dtype,new Int32Array(l))]}var jD={kernelName:va,backendName:"webgl",kernelFunc:LZ};function BZ(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
2023-01-06 19:23:06 +01:00
${s.shape}`);let a=t.readSync(o.dataId),i=t.readSync(n.dataId),p=t.readSync(s.dataId),[u,c]=jf(a,o.shape,o.dtype,i,p,!0);return t.makeTensorInfo(c,o.dtype,u)}var XD={kernelName:gi,backendName:"webgl",kernelFunc:BZ};function VZ(r){let{inputs:e,backend:t}=r,{data:o,indices:n,segmentIds:s}=e;if(o.shape.length<1)throw new Error("Data should be at least 1 dimensional but received scalar");if(n.shape.length!==1)throw new Error(`Indices should be a vector but received shape
2022-11-18 17:13:29 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Segment ids should be a vector but received shape
2023-01-06 19:23:06 +01:00
${s.shape}`);let a=t.readSync(o.dataId),i=t.readSync(n.dataId),p=t.readSync(s.dataId),[u,c]=jf(a,o.shape,o.dtype,i,p);return t.makeTensorInfo(c,o.dtype,u)}var YD={kernelName:xi,backendName:"webgl",kernelFunc:VZ};function zZ(r){let{inputs:e,backend:t,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=e,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=S.calculateShapes(s,n,i),d=!1;if(s.dtype==="string"){let x=t.bufferSync(n),b=t.bufferSync(s),C=y.decodeString(t.readSync(a.dataId)[0]),w=hE(x,b,i,m,c,u,p,l,C,d);return t.makeTensorInfo(i,w.dtype,w.values)}let f=new Sc(u,p,n.shape.length,s.shape.length,l,[m,1],d),h=t.runWebGLProgram(f,[s,n,a],s.dtype),g=re({inputs:{x:h},backend:t,attrs:{shape:i}});return t.disposeIntermediateTensorInfo(h),g}var QD={kernelName:yi,backendName:"webgl",kernelFunc:zZ};function WZ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=o,i=y.parseAxisParam(a,n.shape)[0],p=S.prepareSplitSize(n,s,i),u=n.shape.length,c=new Array(u).fill(0),l=n.shape.slice();return p.map(m=>{let d=[...l];d[i]=m;let f=ls({inputs:{x:n},backend:t,attrs:{begin:c,size:d}});return c[i]+=m,f})}var ZD={kernelName:Rs,backendName:"webgl",kernelFunc:WZ};var JD="return sqrt(x);",UZ=he({opSnippet:JD,packedOpSnippet:JD,cpuKernelImpl:CE}),eF={kernelName:Hn,backendName:"webgl",kernelFunc:UZ};var GZ="return x * x;",HZ=he({opSnippet:GZ}),tF={kernelName:bi,backendName:"webgl",kernelFunc:HZ};var rF="return (a - b) * (a - b);",KZ=tt({opSnippet:rF,packedOpSnippet:rF}),oF={kernelName:jn,backendName:"webgl",kernelFunc:KZ};function qZ({inputs:r,attrs:e,backend:t}){let{x:o}=r,n=zt+`
2022-11-18 17:13:29 +01:00
return x > 0.0 ? 1.0 : float(${e.alpha});
2023-01-06 19:23:06 +01:00
`,s=new Jt(o.shape,n);return t.runWebGLProgram(s,[o],o.dtype)}var nF={kernelName:fo,backendName:"webgl",kernelFunc:qZ};var mg=class{constructor(e,t,o){this.variableNames=["x"],this.outputShape=o;let n=o.length,s=$e(o.length),a=$e(o.length),i="";if(n===1)i="coords * strides + begin";else{let p=0;i=o.map((u,c)=>(p++,o.length===1?`coords * strides[${c}] + begin[${c}]`:`coords[${p-1}] * strides[${c}] + begin[${c}]`)).join(",")}this.userCode=`
2022-11-18 17:13:29 +01:00
${s} begin = ${s}(${e});
${s} strides = ${s}(${t});
void main() {
${a} coords = getOutputCoords();
setOutput(getX(${i}));
}
2023-01-06 19:23:06 +01:00
`}};function jZ(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:d,finalShape:f,isIdentity:h,sliceDim0:g,isSimpleSlice:x,begin:b,end:C,strides:w}=ut.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=re({inputs:{x:n},backend:t,attrs:{shape:f}});else if(g||x){y.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let E=ut.computeOutShape(b,C,w),A=ls({inputs:{x:n},backend:t,attrs:{begin:b,size:E}});k=re({inputs:{x:A},backend:t,attrs:{shape:f}}),t.disposeIntermediateTensorInfo(A)}else if(t.shouldExecuteOnCPU([n])){let A=t.readSync(n.dataId),R=le(n.shape,n.dtype,A),D=SE(d,R,w,b);k=t.makeTensorInfo(f,n.dtype,D.values)}else{let A=new mg(b,w,d);k=t.runWebGLProgram(A,[n],n.dtype)}let _=re({inputs:{x:k},backend:t,attrs:{shape:f}});return t.disposeIntermediateTensorInfo(k),_}var sF={kernelName:Xn,backendName:"webgl",kernelFunc:jZ};function XZ(r){let{inputs:e,backend:t,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=e,m=t.readSync(c.dataId),d=t.readSync(l.dataId),[f,h]=wE(m,d,n,s,a,i,p,u);return[t.makeTensorInfo([f.length],"string",f),t.makeTensorInfo(l.shape,"int32",h)]}var aF={kernelName:Ds,backendName:"webgl",kernelFunc:XZ};function YZ(r){let{inputs:e,backend:t,attrs:o}=r,{skipEmpty:n}=o,{input:s,delimiter:a}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(s.shape.length!==1)throw new Error(`Input must be a vector, got shape: ${s.shape}`);if(a.shape.length!==0)throw new Error(`Delimiter must be a scalar, got shape: ${a.shape}`);let i=t.readSync(s.dataId),p=t.readSync(a.dataId)[0],[u,c,l]=IE(i,p,n),m=c.length;return[t.makeTensorInfo([m,2],"int32",u),t.makeTensorInfo([m],"string",c),t.makeTensorInfo([2],"int32",new Int32Array(l))]}var iF={kernelName:Ci,backendName:"webgl",kernelFunc:YZ};function QZ(r){let{inputs:e,backend:t,attrs:o}=r,{numBuckets:n}=o,{input:s}=e;if(s.dtype!=="string")throw new Error("Input must be of datatype string");if(n<=0)throw new Error("Number of buckets must be at least 1");let a=t.readSync(s.dataId),i=vE(a,n);return t.makeTensorInfo(s.shape,"int32",i)}var uF={kernelName:Si,backendName:"webgl",kernelFunc:QZ};var ZZ="return tan(x);",JZ=he({opSnippet:ZZ}),pF={kernelName:Qn,backendName:"webgl",kernelFunc:JZ};var e9=`
2022-11-18 17:13:29 +01:00
float e2x = exp(-2.0 * abs(x));
return sign(x) * (1.0 - e2x) / (1.0 + e2x);
2023-01-06 19:23:06 +01:00
`,t9=he({opSnippet:e9}),cF={kernelName:Zn,backendName:"webgl",kernelFunc:t9};var dg=class{constructor(e,t){this.variableNames=["A"];let o=new Array(e.length);for(let a=0;a<o.length;a++)o[a]=e[a]*t[a];this.outputShape=o,this.rank=o.length;let n=$e(this.rank),s=r9(e);this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
${n} resRC = getOutputCoords();
setOutput(getA(${s}));
}
2023-01-06 19:23:06 +01:00
`}};function r9(r){let e=r.length;if(e>5)throw Error(`Tile for rank ${e} is not yet supported`);if(e===1)return`imod(resRC, ${r[0]})`;let t=["resRC.x","resRC.y","resRC.z","resRC.w","resRC.u"],o=[];for(let n=0;n<r.length;n++)o.push(`imod(${t[n]}, ${r[n]})`);return o.join()}function Qw(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reps:s}=o;if(n.dtype==="string"||n.shape.length>5){let p=t.readSync(n.dataId),u=n.dtype==="string"?p.map(m=>y.decodeString(m)):p,c=le(n.shape,n.dtype,u),l=NE(c,s);return t.makeTensorInfo(l.shape,l.dtype,l.values)}let a=new dg(n.shape,s);return t.runWebGLProgram(a,[n],n.dtype)}var lF={kernelName:to,backendName:"webgl",kernelFunc:Qw};var fg=class{constructor(e){this.variableNames=["x","indices"],this.customUniforms=[{name:"n",type:"int"},{name:"firstPass",type:"int"},{name:"negativeInf",type:"float"},{name:"dir",type:"int"},{name:"inc",type:"int"}],this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int elemIdx = coords[1];
// We compare elements pair-wise within a group of size 2 * inc.
// The comparing rule for each group alternates between ascending
// and descending. Within each group, we compare each pair at
// positions i and i+inc. To decide whether an element at position i
// is x0 or x1, we mod it by 2 * inc, if the result is smaller than
// inc, it is in the first half of the group, we denote it as x0,
// otherwise we denote it as x1.
// For example, as shown in the Bitonic top K paper referenced above,
// Figure5(a) shows that element[1] is in the
// second half of the group when group size is 2, but it is in the
// first half of the group when group size is 4.
bool isFirstInPair = imod(elemIdx, 2 * inc) < inc;
int i = isFirstInPair ? elemIdx : elemIdx - inc;
int i0 = firstPass == 1 ? i : int(getIndices(batch, i));
int i1 = firstPass == 1 ? i + inc : int(getIndices(batch, i + inc));
float x0 = i0 < n ? getX(batch, i0) : negativeInf;
float x1 = i1 < n ? getX(batch, i1) : negativeInf;
// Denotes which direction indices are in (ascending or descending).
bool reverse = imod(elemIdx, 2 * dir) >= dir;
bool isGreater = x0 > x1 || (x0 == x1 && i1 > i0);
if (reverse == isGreater) { // Elements in opposite order of direction
int iTemp = i0;
i0 = i1;
i1 = iTemp;
}
if (isFirstInPair) {
setOutput(float(i0));
} else {
setOutput(float(i1));
}
}
2022-11-20 22:20:02 +01:00
`}},hg=class{constructor(e){this.variableNames=["x","indices"],this.customUniforms=[{name:"n",type:"int"},{name:"firstPass",type:"int"},{name:"k",type:"int"}],this.outputShape=e,this.userCode=`
2022-11-18 17:13:29 +01:00
void main() {
// Takes max of indices (0, k), (1, k + 1), (2, k + 2) ...
ivec2 coords = getOutputCoords();
int batch = coords[0];
int elemIdx = coords[1];
// The output size is half of the previous size.
// If the previous sequence is | | | | _ _ _ _ | | | | _ _ _ _ (k=4),
// we only need to output the indices at positions |, the indices at
// positions _ can be thrown away, see Figure5(b) After Phase 2
// (Merge phase) in the Bitonic Top K paper referenced above.
// For example, the paper shows we only need to output the orange bars.
// The output sequence should look like this | | | | | | | |.
// Because the sequence is halved, to map the output index back
// to the previous sequence to find the corresponding value,
// we need to double the index. When we double the index,
// we basically interpolate a position, so 2i looks like
// | _ | _ | _ | _ | _ | _ | _. We move the | to the first k position
// of each 2k positions by - elemIdx % k. E.g. for output at
// index 4,5,6,7, we want to get the corresponding element at
// original index 8,9,10,11, for output at index 8,9,10,11,
// we want to get the corresponding element at original index
// 16,17,18,19, so on and so forth.
int i = elemIdx < k ? elemIdx : (elemIdx * 2 - imod(elemIdx, k));
int i0 = firstPass == 1 ? i : int(getIndices(batch, i));
int i1 = firstPass == 1 ? i + k : int(getIndices(batch, i + k));
float x0 = getX(batch, i0);
float x1 = i1 < n ? getX(batch, i1) : x0;
setOutput(x0 >= x1 ? float(i0) : float(i1));
}
2023-01-06 19:23:06 +01:00
`}};function Gu(r,e){e!==null&&r.disposeIntermediateTensorInfo(e)}function mF(r){let e=1;for(;e<r;)e*=2;return e}function o9(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{k:s,sorted:a}=o,i=O().getNumber("TOPK_LAST_DIM_CPU_HANDOFF_SIZE_THRESHOLD"),p=O().getNumber("TOPK_K_CPU_HANDOFF_THRESHOLD"),u=n.shape,c=u[u.length-1];if(t.shouldExecuteOnCPU([n])||c<i||s>p){let D=t.readSync(n.dataId),[P,M]=TE(D,u,n.dtype,s,a);return[t.makeTensorInfo(P.shape,P.dtype,P.values),t.makeTensorInfo(M.shape,M.dtype,M.values)]}if(s===0)return u[u.length-1]=0,[t.makeTensorInfo(u,n.dtype,[]),t.makeTensorInfo(u,"int32",[])];if(c===1)return[n,qa({attrs:{shape:u,dtype:"int32",value:0},backend:t})];let l=t.texData.get(n.dataId),m=l!==null&&l.isPacked,d=m?t.unpackTensor(n):n,h=y.sizeFromShape(u)/c,g=re({inputs:{x:d},attrs:{shape:[h,c]},backend:t});m&&Gu(t,d);let x=mF(s),b=mF(c),C=null,w=()=>C===null?[g,g]:[g,C],k=(D,P,M)=>{let L=w(),V=new fg(M),U=[[c],[C===null?1:0],[Number.NEGATIVE_INFINITY],[D],[P]],K=C;C=t.runWebGLProgram(V,L,"int32",U),Gu(t,K)};for(let D=1;D<x;D*=2){let P=D*2;for(let M=D;M>=1;M/=2)k(P,M,[h,b])}for(let D=b;D>x;D/=2){let P=w(),M=new hg([h,D/2]),V=[[c],[C===null?1:0],[x]],z=C;C=t.runWebGLProgram(M,P,"int32",V),Gu(t,z);let U=x/2,K=U*2;for(let H=U;H>=1;H/=2)k(K,H,C.shape)}let _=C;C=ls({inputs:{x:C},backend:t,attrs:{begin:0,size:[h,s]}}),Gu(t,_);let E=Gw({inputs:{x:g,indices:C},backend:t,attrs:{axis:1,batchDims:1}});Gu(t,g);let A=u.slice(0,-1);A.push(s),_=C,C=re({inputs:{x:C},attrs:{shape:A},backend:t}),Gu(t,_);let R=E;return E=re({inputs:{x:E},attrs:{shape:A},backend:t}),Gu(t,R),[E,C]}var dF={kernelName:Jn,backendName:"webgl",kernelFunc:o9};var gg=class{constructor(e,t,o,n,s,a){this.variableNames=["Image","Transforms"],this.outputShape=a;let i=o==="nearest"?1:2,p;switch(n){case"constant":p=1;break;case"reflect":p=2;break;case"wrap":p=3;break;case"nearest":p=4;break;default:p=1;break}this.userCode=`
2022-11-18 17:13:29 +01:00
float mapCoord(float outCoord, float len) {
float inCoord = outCoord;
if(${p} == 2) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz2 = 2.0 * len;
if (inCoord < sz2) {
inCoord = sz2 * float(int(float(-inCoord / sz2))) +
inCoord;
}
inCoord = inCoord < -len ? inCoord + sz2 : -inCoord - 1.0;
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz2 = 2.0 * len;
inCoord -= sz2 * float(int(float(inCoord / sz2)));
if (inCoord >= len) {
inCoord = sz2 - inCoord - 1.0;
}
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (${p} == 3) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz = len - 1.0;
inCoord += len * (float(int(float(-inCoord / sz))) + 1.0);
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
float sz = len - 1.0;
inCoord -= len * float(int(float(inCoord / sz)));
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (${p} == 4) {
return clamp(outCoord, 0.0, len - 1.0);
} else {
return outCoord;
}
}
float readWithFillValue(int batch, int coordY, int coordX,
int channel) {
float outputValue;
if (0 <= coordY && coordY < ${e} && 0 <= coordX && coordX < ${t}) {
outputValue = getImage(batch, coordY, coordX, channel);
} else {
outputValue = float(${s});
}
return outputValue;
}
void main() {
ivec4 coords = getOutputCoords();
float outputValue;
int batch = coords[0];
int x = coords[2];
int y = coords[1];
int channel = coords[3];
float xf = float(x);
float yf = float(y);
float a1 = getTransforms(batch, 0);
float a2 = getTransforms(batch, 1);
float a3 = getTransforms(batch, 2);
float b1 = getTransforms(batch, 3);
float b2 = getTransforms(batch, 4);
float b3 = getTransforms(batch, 5);
float c1 = getTransforms(batch, 6);
float c2 = getTransforms(batch, 7);
float projection = c1 * xf + c2 * yf + 1.0;
if (projection == 0.0) {
outputValue = float(${s});
} else {
float inX = (a1 * xf + a2 * yf + a3) / projection;
float inY = (b1 * xf + b2 * yf + b3) / projection;
float mapX = mapCoord(inX, float(${t}));
float mapY = mapCoord(inY, float(${e}));
if (${i} == 1) {
int coordY = int(round(mapY));
int coordX = int(round(mapX));
outputValue = readWithFillValue(batch, coordY, coordX,
channel);
} else {
float yFloor = floor(mapY);
float xFloor = floor(mapX);
float yCeil = yFloor + 1.0;
float xCeil = xFloor + 1.0;
float valueYFloor = (xCeil - mapX) *
readWithFillValue(batch, int(yFloor), int(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, int(yFloor), int(xCeil), channel);
float valueYCeil = (xCeil - mapX) *
readWithFillValue(batch, int(yCeil), int(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, int(yCeil), int(xCeil), channel);
outputValue = (yCeil - mapY) * valueYFloor +
(mapY - yFloor) * valueYCeil;
}
}
setOutput(outputValue);
}
2023-01-06 19:23:06 +01:00
`}};function n9(r){let{inputs:e,backend:t,attrs:o}=r,{image:n,transforms:s}=e,{interpolation:a,fillMode:i,fillValue:p,outputShape:u}=o,[c,l,m,d]=n.shape,[f,h]=u!=null?u:[l,m],g=[c,f,h,d],x=new gg(l,m,a,i,p,g);return t.runWebGLProgram(x,[n,s],"float32")}var fF={kernelName:es,backendName:"webgl",kernelFunc:n9};function s9(r){let{inputs:e,attrs:t,backend:o}=r,{axis:n}=t,{x:s}=e;us(s,"unique"),console.warn("WARNING: ","UI might be locked temporarily as data is being downloaded");let a=o.readSync(s.dataId),{outputValues:i,outputShape:p,indices:u}=_E(a,n,s.shape,s.dtype);return[o.makeTensorInfo(p,s.dtype,i),o.makeTensorInfo([u.length],"int32",u)]}var hF={kernelName:Np,backendName:"webgl",kernelFunc:s9};function a9(r){let{inputs:e,backend:t,attrs:o}=r,{value:n}=e,{axis:s}=o;s<0&&(s+=n.shape.length);let a=n,i=a.shape.length,p=n.shape[s],u=new Array(i-1),c=0;for(let h=0;h<i;h++)h!==s&&(u[c++]=a.shape[h]);let l=[],m=new Array(i).fill(0),d=a.shape.slice();d[s]=1;let f=new Array(p);for(let h=0;h<f.length;h++){m[s]=h;let g=ls({inputs:{x:a},backend:t,attrs:{begin:m,size:d}}),x=re({inputs:{x:g},backend:t,attrs:{shape:u}});f[h]=x,l.push(g)}return l.forEach(h=>t.disposeIntermediateTensorInfo(h)),f}var gF={kernelName:Fs,backendName:"webgl",kernelFunc:a9};var xg=class{constructor(e,t){this.variableNames=["x","segmentIds"];let o=e.windowSize,n=e.batchSize,s=e.inSize,a=e.numSegments,i=a*Math.ceil(s/o);this.outputShape=[n,i];let p="0.0",u="sumValue",c=Math.floor(o/4)*4,l=o%4,m=`
2022-11-18 17:13:29 +01:00
sumValue += dot(values, segFilter);
2022-11-20 22:20:02 +01:00
`,d="";s%o>0&&(d=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return initializationValue;
}
2022-11-20 22:20:02 +01:00
`);let f="";s%o>0&&(f=`
2022-11-18 17:13:29 +01:00
if (inIdx < 0 || inIdx >= ${s}) {
return -1.0;
}
`),this.userCode=`
const float initializationValue = ${p};
float getValue(int batch, int inIdx) {
2022-11-20 22:20:02 +01:00
${d}
2022-11-18 17:13:29 +01:00
return getX(batch, inIdx);
}
float getSegmentIdAtIndex(int inIdx) {
2022-11-20 22:20:02 +01:00
${f}
2022-11-18 17:13:29 +01:00
return getSegmentIds(inIdx);
}
void main() {
ivec2 coords = getOutputCoords();
int batch = coords[0];
int outIdx = coords[1];
int inOffset = int(floor(float(outIdx) / float(
${a})) * float(${o}));
int currentSeg = int(mod(float(outIdx), float(${a})));
float sumValue = 0.0;
for (int i = 0; i < ${c}; i += 4) {
int inIdx = inOffset + i;
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
getValue(batch, inIdx + 3)
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 2)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 3)) == currentSeg ? 1 : 0
);
${m}
}
int inIdx = inOffset + ${c};
if (${l===1}) {
vec4 values = vec4(
getValue(batch, inIdx),
initializationValue,
initializationValue,
initializationValue
);
int inIdxSeg = int(getSegmentIdAtIndex(inIdx));
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
0,
0,
0
);
${m}
} else if (${l===2}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
initializationValue,
initializationValue
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
0,
0
);
${m}
} else if (${l===3}) {
vec4 values = vec4(
getValue(batch, inIdx),
getValue(batch, inIdx + 1),
getValue(batch, inIdx + 2),
initializationValue
);
vec4 segFilter = vec4(
int(getSegmentIdAtIndex(inIdx)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 1)) == currentSeg ? 1 : 0,
int(getSegmentIdAtIndex(inIdx + 2)) == currentSeg ? 1 : 0,
0
);
${m}
}
setOutput(${u});
}
2023-01-06 19:23:06 +01:00
`}};function i9(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,segmentIds:s}=e,{numSegments:a}=o,i=n.shape.length,p=[],u=0,c=S.getAxesPermutation([u],i),l=n;c!=null&&(l=yt({inputs:{x:n},backend:t,attrs:{perm:c}}),p.push(l),u=S.getInnerMostAxes(1,i)[0]);let m=S.segment_util.computeOutShape(l.shape,u,a),d=y.sizeFromShape([l.shape[u]]),f=re({inputs:{x:l},backend:t,attrs:{shape:[-1,d]}});p.push(f);let h=Ta(n.dtype),g=(w,k,_,E,A)=>{let R=w.shape[0],D=w.shape[1],P=S.segment_util.segOpComputeOptimalWindowSize(D,A),M={windowSize:P,inSize:D,batchSize:R,numSegments:A},L=new xg(M,k),V=t.compileAndRun(L,[w,_],E);if(p.push(V),V.shape[1]===A)return V;let z=Yw({backend:t,attrs:{start:0,stop:A,step:1,dtype:"float32"}}),U=Qw({inputs:{x:z},backend:t,attrs:{reps:[D/P]}});return p.push(z),p.push(U),g(V,k,U,E,A)},x=g(f,"unsortedSegmentSum",s,h,a),b=re({inputs:{x},backend:t,attrs:{shape:m}}),C=b;if(c!=null){p.push(b);let w=S.getUndoAxesPermutation(c);C=yt({inputs:{x:C},backend:t,attrs:{perm:w}})}return p.forEach(w=>t.disposeIntermediateTensorInfo(w)),C}var xF={kernelName:Tp,backendName:"webgl",kernelFunc:i9};var u9=[rA,nA,sA,aA,uA,pA,cA,lA,fA,hA,gA,xA,yA,bA,CA,SA,wA,IA,vA,kA,NA,_A,$A,EA,FA,PA,MA,KE,BA,zA,WA,UA,GA,HA,KA,qA,jA,XA,YA,JA,eR,tR,rR,oR,nR,sR,aR,iR,uR,pR,cR,lR,mR,dR,fR,gR,xR,yR,bR,SR,wR,IR,vR,kR,NR,TR,_R,$R,HE,ER,VA,AR,RR,DR,qE,FR,OR,PR,MR,LR,BR,VR,zR,WR,UR,HR,KR,qR,jR,XR,YR,ZR,eD,tD,rD,oD,nD,pD,YE,cD,lD,mD,dD,AA,fD,xD,yD,bD,CD,jE,SD,wD,ID,vD,kD,RA,sD,ND,TD,_D,ZE,$D,ED,AD,RD,DD,FD,OD,PD,MD,LD,BD,VD,zD,WD,UD,GD,TA,uD,HD,KD,qD,jD,XD,YD,QD,ZD,eF,tF,oF,nF,sF,aF,iF,uF,iD,eA,pF,cF,lF,dF,fF,tA,hF,gF,xF,hD];for(let r of u9)ka(r);var Fe;(function(r){r[r.float32=0]="float32",r[r.int32=1]="int32",r[r.bool=2]="bool",r[r.string=3]="string",r[r.complex64=4]="complex64"})(Fe||(Fe={}));var Ui;(function(r){r[r.linear=0]="linear",r[r.relu=1]="relu",r[r.relu6=2]="relu6",r[r.prelu=3]="prelu",r[r.leakyrelu=4]="leakyrelu",r[r.sigmoid=5]="sigmoid",r[r.elu=6]="elu"})(Ui||(Ui={}));var yF;function p9(r){yF=r.wasm.cwrap(ho,null,["number","array","number","number","array","number","number","number","number","number","number","number","number"])}function c9(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=e;if(n.dtype!=="float32"||s.dtype!=="float32")throw new Error("_FusedMatMul for non non-float32 tensors not yet supported.");let{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o,m=t.dataIdMap.get(n.dataId).id,d=t.dataIdMap.get(s.dataId).id,f=0;if(a!=null){let A=t.dataIdMap.get(a.dataId);if(A.shape.length!==1)throw new Error(`_FusedMatMul only supports rank-1 bias but got rank ${A.shape.length}.`);f=A.id}let h=i==null?0:t.dataIdMap.get(i.dataId).id,g=Ui[c];if(g==null)throw new Error(`${c} activation not yet supported for FusedConv2D in the wasm backend.`);let x=p?n.shape[2]:n.shape[1],b=u?s.shape[1]:s.shape[2],C=yr.assertAndGetBroadcastShape(n.shape.slice(0,-2),s.shape.slice(0,-2)),w=t.makeOutput([...C,x,b],n.dtype),k=t.dataIdMap.get(w.dataId).id,_=new Uint8Array(new Int32Array(n.shape).buffer),E=new Uint8Array(new Int32Array(s.shape).buffer);return yF(m,_,n.shape.length,d,E,s.shape.length,p,u,g,f,h,l||0,k),w}var bF={kernelName:ho,backendName:"wasm",setupFunc:p9,kernelFunc:c9};function Ve(r,e){let t;function o(s){t=s.wasm.cwrap(r,null,["number","number","number"])}function n(s){let{backend:a,inputs:{x:i}}=s,p=a.dataIdMap.get(i.dataId).id,u=a.makeOutput(i.shape,e||i.dtype),c=a.dataIdMap.get(u.dataId).id;return y.sizeFromShape(u.shape)===0||t(p,Fe[i.dtype],c),u}return{kernelName:r,backendName:"wasm",setupFunc:o,kernelFunc:n}}var CF=Ve(ys);function rt(r,e,t){let o;function n(a){o=a.wasm.cwrap(r,null,["number","array","number","number","array","number","number","number"])}function s(a){let{backend:i,inputs:p}=a,{a:u,b:c}=p,l=i.dataIdMap.get(u.dataId).id,m=i.dataIdMap.get(c.dataId).id,d=t!=null?t:u.dtype,f=S.assertAndGetBroadcastShape(u.shape,c.shape),h=i.makeOutput(f,d);if(y.sizeFromShape(f)===0)return h;let g=new Uint8Array(new Int32Array(u.shape).buffer),x=new Uint8Array(new Int32Array(c.shape).buffer),b=i.dataIdMap.get(h.da
2022-11-18 17:13:29 +01:00
${o.shape}`);if(n.shape.length!==1)throw new Error(`Input shape should be a vector but received shape
2023-01-06 19:23:06 +01:00
${n.shape}`);if(s.shape.length!==1)throw new Error(`Target shape should be a vector but received shape ${s.shape}`);let a=e.dataIdMap.get(o.dataId).id,i=e.dataIdMap.get(n.dataId).id,p=e.dataIdMap.get(s.dataId).id,u=o.shape[0],c=y.sizeFromShape(s.shape),l=e.makeOutput([u,c],o.dtype),m=e.dataIdMap.get(l.dataId).id,d=e.makeOutput([c],s.dtype),f=e.dataIdMap.get(d.dataId).id,h=e.makeOutput([3],"int32"),g=e.dataIdMap.get(h.dataId).id;WP(a,i,p,u,m,f,g);let x=e.readSync(h.dataId),b;switch(x[0]){case 0:{b=S.getSparseReshapeMultipleNegativeOneOutputDimErrorMessage(x[1],x[2]);break}case 1:{b=S.getSparseReshapeNegativeOutputDimErrorMessage(x[1],x[2]);break}case 2:b=S.getSparseReshapeEmptyTensorZeroOutputDimErrorMessage();break;case 3:{let C=Array.from(e.readSync(n.dataId)),w=Array.from(e.readSync(d.dataId));b=S.getSparseReshapeInputOutputMultipleErrorMessage(C,w);break}case 4:{let C=Array.from(e.readSync(n.dataId)),w=Array.from(e.readSync(d.dataId));b=S.getSparseReshapeInputOutputMismatchErrorMessage(C,w);break}default:b=""}if(e.disposeData(h.dataId),b)throw e.disposeData(l.dataId),e.disposeData(d.dataId),new Error(b);return[l,d]}var UP={kernelName:va,backendName:"wasm",setupFunc:fee,kernelFunc:hee};var GP;function Cg(r){GP=r.wasm.cwrap("SparseSegmentReduction",null,["number","number","number","number","number","number","number","number","number"])}function Sg(r,e){let{backend:t,inputs:o}=r,{data:n,indices:s,segmentIds:a}=o,i=s.shape[0],p=t.readSync(a.dataId,i-1,i)[0],c=i>0?p+1:0;if(c<0)throw new Error(S.getSparseSegmentReductionNegativeSegmentIdsErrorMessage());let l=n.shape.slice();l[0]=c;let m=t.dataIdMap.get(n.dataId).id,d=t.dataIdMap.get(s.dataId).id,f=t.dataIdMap.get(a.dataId).id,h=t.makeOutput(l,n.dtype),g=t.dataIdMap.get(h.dataId).id,x=t.makeOutput([4],"int32"),b=t.dataIdMap.get(x.dataId).id;GP(m,Fe[n.dtype],n.shape[0],d,f,g,b,e,0);let C=t.readSync(x.dataId),w;switch(C[0]){case 0:{w=S.getSparseSegmentReductionNegativeSegmentIdsErrorMessage();break}case 1:{w=S.getSparseSegmentReductionNonIncreasingSegmentIdsErrorMessage();break}case 2:w=S.getSparseSegmentReductionSegmentIdOutOfRangeErrorMessage(C[1],C[2]);break;case 3:w=S.getSparseSegmentReductionIndicesOutOfRangeErrorMessage(C[1],C[2],C[3]);break;default:w=""}if(t.disposeData(x.dataId),w)throw t.disposeData(h.dataId),new Error(w);return h}function gee(r){return Sg(r,!0)}var HP={kernelName:gi,backendName:"wasm",setupFunc:Cg,kernelFunc:gee};function xee(r){return Sg(r,!1)}var KP={kernelName:xi,backendName:"wasm",setupFunc:Cg,kernelFunc:xee};function yee(r){let{inputs:e,attrs:t,backend:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=t,i=y.parseAxisParam(a,n.shape)[0],p=S.prepareSplitSize(n,s,i),u=new Array(n.shape.length).fill(0),c=n.shape.slice();return p.map(l=>{let m=[...c];m[i]=l;let d=Eo({inputs:{x:n},attrs:{begin:u,size:m},backend:o});return u[i]+=l,d})}var qP={kernelName:Rs,backendName:"wasm",kernelFunc:yee};var jP=Ve(Hn);var XP=Ve(bi);var bee=!0,YP=rt(jn,bee);var QP;function Cee(r){QP=r.wasm.cwrap(fo,null,["number","number","number","number"])}function See(r){let{backend:e,inputs:t,attrs:o}=r,{alpha:n}=o,{x:s}=t,a=e.dataIdMap.get(s.dataId).id,i=e.makeOutput(s.shape,s.dtype),p=e.dataIdMap.get(i.dataId).id;return QP(a,n,Fe[s.dtype],p),i}var ZP={kernelName:fo,backendName:"wasm",setupFunc:Cee,kernelFunc:See};var JP;function wee(r){JP=r.wasm.cwrap(Xn,null,["number","array","number","array","array","array","array","array","number","number"])}function Iee(r){let{backend:e,inputs:t,attrs:o}=r,{x:n}=t,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:d,finalShape:f,isIdentity:h,sliceDim0:g,isSimpleSlice:x,begin:b,end:C,strides:w}=ut.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=Lt({inputs:{x:n},backend:e,attrs:{shape:f}});else if(g||x){y.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=ut.computeOutShape(b,C,w),E=Eo({inputs:{x:n},backend:e,attrs:{begin:b,size:_}});k=Lt({inputs:{x:E},backend:e,attrs:{shape:f}}),e.disposeData(E.dataId)}else{let _=e.makeOutput(d,"float32"),E=
{
var oldValue = 0;
loop {
let newValueF32 = bitcast<f32>(oldValue) + (${e});
let newValue = bitcast<i32>(newValueF32);
let res = atomicCompareExchangeWeak(${r}, oldValue, newValue);
if res.exchanged {
break;
}
oldValue = res.old_value;
}
}`;var M3=(r,e,t,o)=>{let n={dtype:o.dtype,shape:o.shape},s=Yee(t,n,e),a=r.createShaderModule({code:s,label:e.constructor.name});return r.createComputePipeline({compute:{module:a,entryPoint:"_start"},label:e.constructor.name,layout:"auto"})};function Rt(r){if(r<=1)return"i32";if(r===2)return"vec2<i32>";if(r===3)return"vec3<i32>";if(r===4)return"vec4<i32>";if(r===5)return"vec5";if(r===6)return"vec6";throw Error(`GPU for rank ${r} is not yet supported`)}function Ao(r){if(r===0)return"x";if(r===1)return"y";if(r===2)return"z";if(r===3)return"w";if(r===4)return"u";if(r===5)return"v";throw Error(`Index ${r} is not yet supported`)}function Q(...r){let e;switch(r.length){case 0:e=`
2022-11-18 17:13:29 +01:00
fn main()
`;break;case 1:e=`
fn main(${r[0]} : i32)
2023-01-06 19:23:06 +01:00
`;break;default:throw Error("Unreachable")}return e}function D3(r,e){let t;return t=`
${Xee(e)}
2022-11-20 22:20:02 +01:00
fn _start(@builtin(local_invocation_id) LocalId : vec3<u32>,
@builtin(global_invocation_id) GlobalId : vec3<u32>,
@builtin(local_invocation_index) LocalIndex: u32,
@builtin(workgroup_id) WorkgroupId : vec3<u32>,
@builtin(num_workgroups) NumWorkgroups : vec3<u32>) {
localId = LocalId;
localIndex = LocalIndex;
globalId = GlobalId;
numWorkgroups = NumWorkgroups;
workgroupId = WorkgroupId;
${r?"main(getGlobalIndex());":"main();"};
}
2023-01-06 19:23:06 +01:00
`,t}function Xee(r){return`
@compute @workgroup_size(${r.workgroupSize[0]}, ${r.workgroupSize[1]}, ${r.workgroupSize[2]})
`}function Yee(r,e,t){let o=[],n=t.workgroupSize[0]*t.workgroupSize[1]*t.workgroupSize[2];if(o.push(`
2022-11-18 17:13:29 +01:00
var<private> localId: vec3<u32>;
2022-11-20 22:20:02 +01:00
var<private> localIndex: u32;
2022-11-18 17:13:29 +01:00
var<private> globalId: vec3<u32>;
var<private> numWorkgroups: vec3<u32>;
2022-11-20 22:20:02 +01:00
var<private> workgroupId: vec3<u32>;
2022-11-18 17:13:29 +01:00
// Only used when the y/z dimension of workgroup size is 1.
fn getGlobalIndex() -> i32 {
2023-01-06 19:23:06 +01:00
${B3(t)?" return i32(globalId.x);":` return i32((workgroupId.z * numWorkgroups.x * numWorkgroups.y +
workgroupId.y * numWorkgroups.x + workgroupId.x) * ${n}u +
2022-11-20 22:20:02 +01:00
localIndex);
2022-11-18 17:13:29 +01:00
`}
}
2022-11-20 22:20:02 +01:00
`),t.isFromPixels){o.push(`
2022-11-18 17:13:29 +01:00
struct Uniform {
size : i32,
numChannels : i32,
outShapeStrides : vec2<i32>,
};
2023-01-06 19:23:06 +01:00
@group(0) @binding(0) var<storage, read_write> result: array<${vc(e.dtype,t.isVec4)}>;
2022-11-18 17:13:29 +01:00
@group(0) @binding(2) var<uniform> uniforms: Uniform;
2023-01-06 19:23:06 +01:00
`);let f=P3(t);return[F3,o.join(`
`),O3(e.shape),t.getUserCode(),D3(f,t)].join(`
2022-11-20 22:20:02 +01:00
`)}let s="struct Uniforms { NAN : f32, INFINITY : f32, ";t.variableNames.forEach((f,h)=>{let g=Rt(r[h].shape.length);s+=`${f.charAt(0).toLowerCase()+f.slice(1)}Shape : ${g}, `});let a=Rt(e.shape.length);s+=`outShape : ${a}, `;let i=e.shape.length-1,p=Rt(i);s+=`
2023-01-06 19:23:06 +01:00
outShapeStrides: ${p}, `,t.size&&(s+="size : i32, "),t.uniforms&&(s+=t.uniforms),s+="};",s=nte(s),o.push(s),t.atomic?o.push(`
2022-11-18 17:13:29 +01:00
@group(0) @binding(0) var<storage, read_write> result: array<atomic<i32>>;
`):o.push(`
2023-01-06 19:23:06 +01:00
@group(0) @binding(0) var<storage, read_write> result: array<${vc(e.dtype,t.isVec4)}>;
2022-11-20 22:20:02 +01:00
`),t.variableNames.forEach((f,h)=>{o.push(`
2023-01-06 19:23:06 +01:00
@group(0) @binding(${1+h}) var<storage, read> ${f}: array<${t.variableTypes?t.variableTypes[h]:vc(r[h].dtype,t.isVec4)}>;
2022-11-20 22:20:02 +01:00
`)}),s!==""&&o.push(`
2022-11-18 17:13:29 +01:00
@group(0) @binding(${1+t.variableNames.length}) var<uniform> uniforms: Uniforms;
2023-01-06 19:23:06 +01:00
`);let u=tte(e.shape,t.dispatchLayout),c=[F3,o.join(`
`)+Qee,O3(e.shape),u,rte(e.shape.length)];t.atomic||c.push(ote(e.shape,e.dtype,t.isVec4));let l=r.map((f,h)=>ete(f,e.shape,t.variableTypes?t.variableTypes[h]==="vec4<f32>":t.isVec4,t.dispatchLayout.x.length===e.shape.length)).join(`
`);c.push(l),c.push(t.getUserCode());let m=P3(t);return c.push(D3(m,t)),c.join(`
`)}function L3(r,e,t,o){let n=r.shaderKey;if(r.isFromPixels)return n;let s=t.map(c=>c.dtype).concat(o.dtype),a=t.map(c=>S.getBroadcastDims(c.shape,o.shape)),i=t.map(c=>y.arraysEqual(c.shape,o.shape)).join("_"),p=a.map(c=>c.join("_")).join(";"),u=B3(r)?"flatDispatch":"";return n+="_"+(r.workgroupSize?r.workgroupSize.join(","):"")+e.map(c=>c.length).join(",")+s.join(",")+r.variableNames.join(",")+p+i+u,n}var F3=`
2022-11-18 17:13:29 +01:00
struct vec5 {x: i32, y: i32, z: i32, w: i32, u: i32};
struct vec6 {x: i32, y: i32, z: i32, w: i32, u: i32, v: i32};
// Checks whether coordinates lie within the bounds of the shape.
fn coordsInBounds2D(coord : vec2<i32>, shape : vec2<i32>) -> bool {
return all(coord >= vec2<i32>(0)) && all(coord < shape);
}
fn coordsInBounds3D(coord : vec3<i32>, shape : vec3<i32>) -> bool {
return all(coord >= vec3<i32>(0)) && all(coord < shape);
}
fn coordsInBounds4D(coord : vec4<i32>, shape : vec4<i32>) -> bool {
return all(coord >= vec4<i32>(0)) && all(coord < shape);
}
fn getIndexFromCoords1D(coord : i32, shape : i32) -> i32 {
return coord;
}
fn getIndexFromCoords2D(coords : vec2<i32>, shape : vec2<i32>) -> i32 {
return dot(coords, vec2<i32>(shape.y, 1));
}
fn getIndexFromCoords3D(coords : vec3<i32>, shape : vec3<i32>) -> i32 {
return dot(coords, vec3<i32>(shape.y * shape.z, shape.z, 1));
}
fn getIndexFromCoords4D(coords : vec4<i32>, shape : vec4<i32>) -> i32 {
return dot(coords, vec4<i32>(
shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1));
}
fn getIndexFromCoords5D(coords : vec5, shape : vec5) -> i32 {
let shapeStrides: vec5 = vec5(shape.y * shape.z * shape.w * shape.u, shape.z * shape.w * shape.u, shape.w * shape.u, shape.u, 1);
return coords.x*shapeStrides.x + coords.y*shapeStrides.y + coords.z*shapeStrides.z + coords.w*shapeStrides.w + coords.u*shapeStrides.u;
}
fn getIndexFromCoords6D(coords : vec6, shape : vec6) -> i32 {
let shapeStrides: vec6 = vec6(shape.y * shape.z * shape.w * shape.u * shape.v, shape.z * shape.w * shape.u * shape.v, shape.w * shape.u * shape.v, shape.u * shape.v, shape.v, 1);
return coords.x*shapeStrides.x + coords.y*shapeStrides.y + coords.z*shapeStrides.z + coords.w*shapeStrides.w + coords.u*shapeStrides.u + coords.v*shapeStrides.v;
}
fn idiv(a: i32, b: i32, sign: f32) -> i32 {
var res: i32 = a / b;
let modulo: i32 = a % b;
if (sign < 0. && modulo != 0) {
res = res - 1;
}
return res;
}
// NaN defination in IEEE 754-1985 is :
// - sign = either 0 or 1.
// - biased exponent = all 1 bits.
// - fraction = anything except all 0 bits (since all 0 bits represents infinity).
// https://en.wikipedia.org/wiki/IEEE_754-1985#Representation_of_non-numbers
fn isnan(val: f32) -> bool {
let floatToUint: u32 = bitcast<u32>(val);
return (floatToUint & 0x7fffffffu) > 0x7f800000u;
}
fn isnanVec4(val : vec4<f32>) -> vec4<bool> {
2023-01-06 19:23:06 +01:00
let floatToUint: vec4<u32> = bitcast<vec4<u32>>(val);
return (floatToUint & vec4<u32>(0x7fffffffu)) > vec4<u32>(0x7f800000u);
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`,Qee=`
2022-11-20 22:20:02 +01:00
fn isinf(val: f32) -> bool {
return abs(val) == uniforms.INFINITY;
}
2023-01-06 19:23:06 +01:00
`;function O3(r){let e=r.length;if(e<=1)return"fn getCoordsFromIndex(index : i32) -> i32 { return index; }";let t=y.computeStrides(r),o=Rt(e),n=[];for(let a=0;a<e;a++)n.push(`d${a}`);if(t.length===1)return` fn getCoordsFromIndex(index : i32) -> vec2<i32> {
2022-11-18 17:13:29 +01:00
let d0 = index / uniforms.outShapeStrides; let d1 = index - d0 * uniforms.outShapeStrides;
return vec2<i32>(d0, d1);
2023-01-06 19:23:06 +01:00
}`;let s;return s="var index2 = index;"+t.map((a,i)=>{let p=`let ${n[i]} = index2 / uniforms.outShapeStrides.${Ao(i)}`,u=i===t.length-1?`let ${n[i+1]} = index2 - ${n[i]} * uniforms.outShapeStrides.${Ao(i)}`:`index2 = index2 - ${n[i]} * uniforms.outShapeStrides.${Ao(i)}`;return`${p}; ${u};`}).join(""),`
2022-11-18 17:13:29 +01:00
fn getCoordsFromIndex(index : i32) -> ${o} {
${s}
return ${o}(${n.join(",")});
}
2023-01-06 19:23:06 +01:00
`}function Zee(r,e){let t=r.name,o=r.shape.length,n=Rt(o),s="get"+t.charAt(0).toUpperCase()+t.slice(1),a=["d0","d1","d2","d3","d4","d5"].slice(0,o),i=a.map(c=>`${c} : i32`).join(", ");if(o<1)return e?`
2022-11-18 17:13:29 +01:00
fn ${s}() -> vec4<f32> {
return vec4<f32>(${t}[0]);
}
`:`
fn ${s}() ->f32 {
return f32(${t}[0]);
}
`;let p=`uniforms.${t.charAt(0).toLowerCase()+t.slice(1)}Shape`,u=`${o}D`;return o===0&&(u="1D"),e?`
fn ${s}(${i}) -> vec4<f32> {
return vec4<f32>(${t}[getIndexFromCoords${u}(${n}(${a.join(",")}),
${p}) / 4]);
}
`:`
fn ${s}(${i}) -> f32 {
return f32(${t}[getIndexFromCoords${u}(${n}(${a.join(",")}),
${p})]);
}
2023-01-06 19:23:06 +01:00
`}function Jee(r,e,t,o){let n=r.name,s=n.charAt(0).toUpperCase()+n.slice(1),a="get"+s+"ByOutput",i=r.shape.length,p=e.length,u=Rt(p);if(y.arraysEqual(r.shape,e)&&o)return t?`
2022-11-18 17:13:29 +01:00
fn ${a}Index(globalIndex : i32) -> vec4<f32> {
return vec4<f32>(${n}[globalIndex]);
}
fn ${a}Coords(coords : ${u}) -> vec4<f32> {
return vec4<f32>(${n}[${p>1?"getOutputIndexFromCoords(coords)":"coords"} / 4]);
}
`:`
fn ${a}Index(globalIndex : i32) -> f32 {
return f32(${n}[globalIndex]);
}
fn ${a}Coords(coords : ${u}) -> f32 {
return f32(${n}[${p>1?"getOutputIndexFromCoords(coords)":"coords"}]);
}
2022-11-20 22:20:02 +01:00
`;let c=S.getBroadcastDims(r.shape,e),l=p-i,m="";if(i===0)return t?`
2022-11-18 17:13:29 +01:00
fn ${a}Index(globalIndex : i32) -> vec4<f32> {
return get${s}();
}
fn ${a}Coords(coords : ${u}) -> vec4<f32> {
return get${s}();
}
`:`
fn ${a}Index(globalIndex : i32) -> f32{
return get${s}();
}
fn ${a}Coords(coords : ${u}) -> f32{
return get${s}();
}
2023-01-06 19:23:06 +01:00
`;p<2&&c.length>=1?m="coords = 0;":m=c.map(g=>`coords.${Ao(g+l)} = 0;`).join(`
`);let d="";if(p<2&&i>0)d="coords";else if(p>1){let g=Rt(i),x=r.shape.map((b,C)=>`coords.${Ao(C+l)}`).join(", ");d=`${g}(${x})`}else d="coords";let f=`uniforms.${n.charAt(0).toLowerCase()+n.slice(1)}Shape`,h=`${i}D`;return t?`
2022-11-18 17:13:29 +01:00
fn ${a}Index(globalIndex : i32) -> vec4<f32> {
var coords = getCoordsFromIndex(globalIndex);
${m}
2022-11-20 22:20:02 +01:00
return ${n}[getIndexFromCoords${h}(${d}, ${f}) / 4];
2022-11-18 17:13:29 +01:00
}
fn ${a}Coords(coordsIn : ${u}) -> vec4<f32> {
var coords = coordsIn;
${m}
2022-11-20 22:20:02 +01:00
return ${n}[getIndexFromCoords${h}(${d}, ${f}) / 4];
2022-11-18 17:13:29 +01:00
}
`:`
fn ${a}Index(globalIndex : i32) -> f32 {
var coords = getCoordsFromIndex(globalIndex);
${m}
2022-11-20 22:20:02 +01:00
return f32(${n}[getIndexFromCoords${h}(${d}, ${f})]);
2022-11-18 17:13:29 +01:00
}
fn ${a}Coords(coordsIn : ${u}) -> f32 {
var coords = coordsIn;
${m}
2022-11-20 22:20:02 +01:00
return f32(${n}[getIndexFromCoords${h}(${d}, ${f})]);
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`}function ete(r,e,t,o){let n=Zee(r,t);return r.shape.length<=e.length&&(n+=Jee(r,e,t,o)),n}function tte(r,e){let{x:t,y:o=[],z:n=[]}=e,s=r.length,a=t.length+o.length+n.length;if(a!==s)return"";if(t.length===s)return`fn getOutputCoords() -> ${Rt(s)}{
2022-11-18 17:13:29 +01:00
let globalIndex = getGlobalIndex();
return getCoordsFromIndex(globalIndex);
}
2023-01-06 19:23:06 +01:00
`;let i="",p=[t,o,n];for(let m=0;m<p.length;m++){let d=p[m];if(d.length!==0)if(d.length===1)i+=`let d${d[0]} = i32(globalId[${m}]);`;else{let f=R3(d,"uniforms.outShape");i+=`var index${m} = i32(globalId[${m}]);`;for(let h=0;h<f.length;h++)i+=`let d${d[h]} = index${m} / ${f[h]};`,h===f.length-1?i+=`let d${d[h+1]} = index${m} - d${d[h]} * ${f[h]};`:i+=`index${m} = index${m} - d${d[h]} * ${f[h]};`}}let u=[];for(let m=0;m<a;m++)u.push(`d${m}`);let c=Rt(a),l=`fn getOutputCoords() -> ${c} {
2022-11-18 17:13:29 +01:00
${i}
2023-01-06 19:23:06 +01:00
`;return u.length===0?l+=`return ${c}(0); }`:l+=`return ${c}(${u.join(",")}); }`,l}function rte(r){let e="";switch(r){case 0:case 1:e+=`
2022-11-18 17:13:29 +01:00
fn getOutputIndexFromCoords(coords : i32) -> i32 {
return coords;
}
`;break;case 2:e+=`
fn getOutputIndexFromCoords(coords : vec2<i32>) -> i32 {
return dot(coords, vec2<i32>(uniforms.outShapeStrides, 1));
}
`;break;case 3:e+=`
fn getOutputIndexFromCoords(coords : vec3<i32>) -> i32 {
return dot(coords, vec3<i32>(uniforms.outShapeStrides.x, uniforms.outShapeStrides.y, 1));
}
`;break;case 4:e+=`
fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
return dot(coords, vec4<i32>(
uniforms.outShapeStrides.x, uniforms.outShapeStrides.y, uniforms.outShapeStrides.z, 1));
}
`;break;case 5:e+=`
fn getOutputIndexFromCoords(coords : vec5) -> i32 {
return coords.x * uniforms.outShapeStrides.x +
coords.y * uniforms.outShapeStrides.y +
coords.z * uniforms.outShapeStrides.z +
coords.w * uniforms.outShapeStrides.w +
coords.u;
}
`;break;case 6:e+=`
fn getOutputIndexFromCoords(coords : vec6) -> i32 {
return coords.x * uniforms.outShapeStrides.x +
coords.y * uniforms.outShapeStrides.y +
coords.z * uniforms.outShapeStrides.z +
coords.w * uniforms.outShapeStrides.w +
coords.u * uniforms.outShapeStrides.u +
coords.v;
}
2023-01-06 19:23:06 +01:00
`;break;default:y.assert(!1,()=>`Unsupported ${r}D shape`);break}return e}function B3(r){return r.dispatch[1]===1&&r.dispatch[2]===1}function vc(r,e){return r==="float32"?e?"vec4<f32>":"f32":r==="int32"||r==="bool"?e?"vec4<i32>":"i32":r}function ote(r,e,t){let o=r.length,n=vc(e,t),s;if(t?s=`fn setOutputAtIndex(flatIndex : i32, value : vec4<f32>) {
2022-11-18 17:13:29 +01:00
result[flatIndex] = ${n}(value);
}
fn setOutputAtIndexI32(flatIndex : i32, value : vec4<i32>) {
result[flatIndex] = ${n}(value);
}`:s=`fn setOutputAtIndex(flatIndex : i32, value : f32) {
result[flatIndex] = ${n}(value);
}
fn setOutputAtIndexI32(flatIndex : i32, value : i32) {
result[flatIndex] = ${n}(value);
2022-11-20 22:20:02 +01:00
}`,o>=2){let a=["d0","d1","d2","d3","d4","d5"].slice(0,o),i=Rt(o);t?s+=`
2022-11-18 17:13:29 +01:00
fn setOutputAtCoords(${a.map(p=>`${p} : i32`).join(", ")}, value : vec4<f32>) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndex(flatIndex / 4, value);
}
fn setOutputAtCoordsI32(${a.map(p=>`${p} : i32`).join(", ")}, value : vec4<i32>) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndexI32(flatIndex / 4, value);
}
`:s+=`
fn setOutputAtCoords(${a.map(p=>`${p} : i32`).join(", ")}, value : f32) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndex(flatIndex, value);
}
fn setOutputAtCoordsI32(${a.map(p=>`${p} : i32`).join(", ")}, value : i32) {
let flatIndex = getOutputIndexFromCoords(${i}(${a.join(", ")}));
setOutputAtIndexI32(flatIndex, value);
}
2023-01-06 19:23:06 +01:00
`}return s}function nte(r){let e=/(\w+)\s*:\s*vec(5|6)/g;r=r.replace(e,o=>"@align(16) "+o);let t=/vec(5|6)\s*,\s*(\w+)/g;return r=r.replace(t,(o,n,s)=>`vec${n}, @align(16) ${s}`),r}function P3(r){return!(r.dispatchLayout.hasOwnProperty("y")&&r.dispatchLayout.y.length!==0||r.dispatchLayout.hasOwnProperty("z")&&r.dispatchLayout.z.length!==0)}var hI={};Ge(hI,{GPUBytesPerElement:()=>_g,MatMulProgramType:()=>Ro,assertNotComplex:()=>fI,computeDispatch:()=>Y,computeWorkPerThreadForConv2d:()=>Vl,computeWorkgroupInfoForMatMul:()=>dI,computeWorkgroupSizeForConv2d:()=>Bl,flatDispatchLayout:()=>ae,isWebGPUSupported:()=>zl,tilesFitEvenlyIntoShape:()=>ate});var Ku=r=>{let e=1;for(let t=0;t<r.length;t++)e*=r[t];return e};function ate(r,e){if(r.length!==e.length)throw new Error(`Cannot compute whether rank ${r.length} tiles fit evenly into rank ${e.length} shape - ranks must match.`);return e.every((t,o)=>t%r[o]===0)}function Y(r,e,t=[1,1,1],o=[1,1,1]){let[n,s,a]=[Math.ceil(Ku(r.x.map(i=>e[i]))/(t[0]*o[0])),r.y?Math.ceil(Ku(r.y.map(i=>e[i]))/(t[1]*o[1])):1,r.z?Math.ceil(Ku(r.z.map(i=>e[i]))/(t[2]*o[2])):1];return[n,s,a]}function dI(r,e,t,o=!1){let n=[8,8,1],s=[4,4,1];return o||(r<=8&&(s[1]=1),e<=16&&t<=16&&(n[0]=4)),{workgroupSize:n,elementsPerThread:s}}function Bl(r,e,t=!1){if(t)return[8,8,1];let o=Ku(r.x.map(s=>e[s])),n=Ku(r.y.map(s=>e[s]));return o<=4?[4,16,1]:n<=4?[16,4,1]:[16,16,1]}function Vl(r,e,t=!1){if(t)return[4,4,1];let o=Ku(r.x.map(s=>e[s])),n=Ku(r.y.map(s=>e[s]));return o<=4?[1,2,1]:n<=4?[2,1,1]:[2,2,1]}function ae(r){return{x:r.map((e,t)=>t)}}function _g(r){if(r==="float32"||r==="int32"||r==="bool"||r==="string")return 4;if(r==="complex64")return 8;throw new Error(`Unknown dtype ${r}`)}function zl(){return(typeof window!="undefined"||typeof WorkerGlobalScope!="undefined")&&!!navigator.gpu}function fI(r,e){Array.isArray(r)||(r=[r]),r.forEach(t=>{t!=null&&y.assert(t.dtype!=="complex64",()=>`${e} does not support complex64 tensors in the WebGPU backend.`)})}var Ro;(function(r){r[r.MatMulReduceProgram=0]="MatMulReduceProgram",r[r.MatMulSplitKProgram=1]="MatMulSplitKProgram",r[r.MatMulSmallOutputSizeProgram=2]="MatMulSmallOutputSizeProgram",r[r.MatMulPackedProgram=3]="MatMulPackedProgram",r[r.MatMulMax=4]="MatMulMax"})(Ro||(Ro={}));var ite=O().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"),ute=(r,e)=>{let t=r.limits.maxComputeWorkgroupsPerDimension,o=e.dispatchLayout,n=e.dispatch;if(n.every(a=>a<=t))return n;y.assert(n[0]>t&&o.y===void 0&&o.z===void 0,()=>"Dispatch size exceeds WebGPU limits in Y or Z dimension.");let s=Math.ceil(Math.sqrt(n[0]));return s>t?(s=Math.ceil(Math.cbrt(n[0])),y.assert(s<=t,()=>"Total dispatch size exceeds WebGPU maximum."),[s,s,s]):[s,s,1]},Gi=class extends Zr{constructor(e,t){if(super(),this.commandQueueOwnedIds=new WeakSet,this.dispatchNumberInEncoder=0,this.disposed=!1,this.downloadWaitMs=0,this.tensorDataPendingDisposal=[],this.stagingPendingDisposal=[],this.uniformPendingDisposal=[],this.uploadWaitMs=0,!zl())throw new Error("WebGPU is not supported on this device");this.pipelineCache={},this.device=e,this.queue=e.queue,this.currentCommandEncoder=null,this.currentComputePass=null,this.supportTimeQuery=e.features.has("timestamp-query-inside-passes"),this.adapterInfo=new kg(t),this.thresholdToIncreaseWorkgroups=this.adapterInfo.intelGPUGeneration>=12?16:8,this.bufferManager=new Ng(this.device),this.textureManager=new Tg(this.device),this.tensorMap=new Fo(this,sr()),this.supportTimeQuery&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:2})),O().getBool("WEBGPU_USE_PROFILE_TOOL")&&(this.dummyCanvas=document.createElement("canvas"),this.dummyCanvas.width=1,this.dummyCanvas.height=1,this.dummyContext=this.dummyCanvas.getContext("webgpu"),this.dummyContext.configure({device:e,format:"bgra8unorm"}),document.body.appendChild(this.dummyCanvas))}nextDataId(){return Gi.nextDataId++}floatPrecision(){return 32}defaultGpuBufferUsage(){return GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}disposeData(e,t=!1){if(this.tensorDataPendingDisposal.indexOf(e)>=0)return!1;i
2022-11-18 17:13:29 +01:00
if (isnan(a)) { return a; }
if (isnan(b)) { return b; }
2023-01-06 19:23:06 +01:00
`,$g=`
resultTemp = select(
resultTemp, vec4<f32>(valueForNaN),
vec4<bool>(isNaN) | isnanVec4(a) | isnanVec4(b));
`,pte="return a + b;",cte="return areal * breal - aimag * bimag;",lte="return areal * bimag + aimag * breal;",mte="return a / b;",dte="return f32(a == b);",fte="return vec4<f32>(a == b);",hte="return f32(a > b);",gte="return vec4<f32>(a > b);",xte="return f32(a >= b);",yte="return vec4<f32>(a >= b);",bte=`
2022-11-18 17:13:29 +01:00
let s = sign(a) * sign(b);
let ia = i32(round(a));
let ib = i32(round(b));
return f32(idiv(ia, ib, s));
2023-01-06 19:23:06 +01:00
`,Cte=`
2022-11-18 17:13:29 +01:00
let ia = vec4<i32>(round(a));
let ib = vec4<i32>(round(b));
let cond = ib != vec4<i32>(0);
var resultTemp = vec4<i32>(0);
let s = sign(a) * sign(b);
// Windows (D3D) wants guaranteed non-zero int division at compile-time.
if (cond[0]) {
resultTemp[0] = idiv(ia[0], ib[0], s[0]);
}
if (cond[1]) {
resultTemp[1] = idiv(ia[1], ib[1], s[1]);
}
if (cond[2]) {
resultTemp[2] = idiv(ia[2], ib[2], s[2]);
}
if (cond[3]) {
resultTemp[3] = idiv(ia[3], ib[3], s[3]);
}
return vec4<f32>(resultTemp);
2023-01-06 19:23:06 +01:00
`,Ste="return f32(a < b);",wte="return vec4<f32>(a < b);",Ite="return f32(a <= b);",vte="return vec4<f32>(a <= b);",kte="return f32(a >= 1.0 && b >= 1.0);",Nte=`return (vec4<f32>(a >= vec4<f32>(1.0)) *
vec4<f32>(b >= vec4<f32>(1.0)));`,Tte="return f32(a >= 1.0 || b >= 1.0);",_te=`return min(vec4<f32>(a >= vec4<f32>(1.0)) +
vec4<f32>(b >= vec4<f32>(1.0)), vec4<f32>(1.0));`,$te=`
${V3}
2022-11-20 22:20:02 +01:00
if (b == 0.) {
return uniforms.NAN;
}
var resultTemp = a % b;
if ((a < 0. && b < 0.) || (a >= 0. && b > 0.)) {
return resultTemp;
} else {
return (resultTemp + b) % b;
}
2023-01-06 19:23:06 +01:00
`,Ete=`
let isNaN = !vec4<bool>(b);
2022-11-20 22:20:02 +01:00
let valueForNaN = uniforms.NAN;
var resultTemp = vec4<f32>(a % b);
2023-01-06 19:23:06 +01:00
${$g}
2022-11-20 22:20:02 +01:00
if (!((a[0] < 0. && b[0] < 0.) || (a[0] >= 0. && b[0] > 0.))) {
resultTemp[0] = (resultTemp[0] + b[0]) % b[0];
}
if (!((a[1] < 0. && b[1] < 0.) || (a[1] >= 0. && b[1] > 0.))) {
resultTemp[1] = (resultTemp[1] + b[1]) % b[1];
}
if (!((a[2] < 0. && b[2] < 0.) || (a[2] >= 0. && b[2] > 0.))) {
resultTemp[2] = (resultTemp[2] + b[2]) % b[2];
}
if (!((a[3] < 0. && b[3] < 0.) || (a[3] >= 0. && b[3] > 0.))) {
resultTemp[3] = (resultTemp[3] + b[3]) % b[3];
}
return resultTemp;
2023-01-06 19:23:06 +01:00
`,Ate="return a * b;",Rte=`
2022-11-18 17:13:29 +01:00
if (isnan(a) || isnan(b)) {
return 1.0;
}
return f32(a != b);
2023-01-06 19:23:06 +01:00
`,Dte=`
2022-11-18 17:13:29 +01:00
var resultTemp = vec4<f32>(a != b);
let valueForNaN = 1.0;
2023-01-06 19:23:06 +01:00
${$g}
2022-11-18 17:13:29 +01:00
return resultTemp;
2023-01-06 19:23:06 +01:00
`,Fte=`
2022-11-18 17:13:29 +01:00
if(a < 0.0 && floor(b) < b) {
return uniforms.NAN;
}
if (b == 0.0) {
return 1.0;
}
if (round(abs(b) % 2.0) != 1.0) {
return pow(abs(a), b);
}
return sign(a) * pow(abs(a), b);
2023-01-06 19:23:06 +01:00
`,Ote=`
2022-11-18 17:13:29 +01:00
let isModRound1Bool = vec4<i32>(round(abs(b) % vec4<f32>(2.0))) == vec4<i32>(1);
let isModRound1 = vec4<f32>(isModRound1Bool);
let multiplier = sign(a) * isModRound1 + (vec4<f32>(1.0) - isModRound1);
var resultTemp = multiplier * pow(abs(a), b);
// Ensure that a^0 = 1, including 0^0 = 1 as this correspond to TF and JS
let isExpZero = b == vec4<f32>(0.0);
if (isExpZero.r) {
resultTemp.r = 1.0;
}
if (isExpZero.g) {
resultTemp.g = 1.0;
}
if (isExpZero.b) {
resultTemp.b = 1.0;
}
if (isExpZero.a) {
resultTemp.a = 1.0;
}
let isNaN = (a < vec4<f32>(0.0)) & (floor(b) < b);
let valueForNaN = uniforms.NAN;
2023-01-06 19:23:06 +01:00
${$g}
2022-11-18 17:13:29 +01:00
return resultTemp;
2023-01-06 19:23:06 +01:00
`,Pte="if (a < 0.0) { return b * a; } return a;",Mte=`
2022-11-18 17:13:29 +01:00
let aLessThanZero = vec4<f32>(a < vec4<f32>(0.0));
return (aLessThanZero * (b * a)) + ((vec4<f32>(1.0) - aLessThanZero) * a);
2023-01-06 19:23:06 +01:00
`,Lte="return (a - b) * (a - b);",Bte="return a - b;";function gI(r,e,t="uniforms.NAN"){let o=e?$g:V3;return e?`
2022-11-18 17:13:29 +01:00
let valueForNaN = ${t};
var resultTemp = vec4<f32>(${r}(a, b));
`+o+`
return resultTemp;
`:o+`
return ${r}(a, b);
2023-01-06 19:23:06 +01:00
`}function kc(r,e){switch(r){case ge.ADD:return pte;case ge.ATAN2:return gI("atan2",e);case ge.COMPLEX_MULTIPLY_IMAG:return lte;case ge.COMPLEX_MULTIPLY_REAL:return cte;case ge.DIV:return mte;case ge.EQUAL:return e?fte:dte;case ge.GREATER:return e?gte:hte;case ge.GREATER_EQUAL:return e?yte:xte;case ge.INT_DIV:return e?Cte:bte;case ge.LESS:return e?wte:Ste;case ge.LESS_EQUAL:return e?vte:Ite;case ge.LOGICAL_AND:return e?Nte:kte;case ge.LOGICAL_OR:return e?_te:Tte;case ge.MAX:return gI("max",e);case ge.MIN:return gI("min",e);case ge.MOD:return e?Ete:$te;case ge.MUL:return Ate;case ge.NOT_EQUAL:return e?Dte:Rte;case ge.POW:return e?Ote:Fte;case ge.PRELU:return e?Mte:Pte;case ge.SQUARED_DIFFERENCE:return Lte;case ge.SUB:return Bte;default:throw new Error(`BinaryType ${r} is not implemented!`)}}var X;(function(r){r[r.ABS=0]="ABS",r[r.ACOS=1]="ACOS",r[r.ACOSH=2]="ACOSH",r[r.ASIN=3]="ASIN",r[r.ASINH=4]="ASINH",r[r.ATAN=5]="ATAN",r[r.ATANH=6]="ATANH",r[r.CEIL=7]="CEIL",r[r.COS=8]="COS",r[r.COSH=9]="COSH",r[r.ELU=10]="ELU",r[r.ERF=11]="ERF",r[r.EXP=12]="EXP",r[r.EXPM1=13]="EXPM1",r[r.FLOOR=14]="FLOOR",r[r.IS_FINITE=15]="IS_FINITE",r[r.IS_INF=16]="IS_INF",r[r.IS_NAN=17]="IS_NAN",r[r.LINEAR=18]="LINEAR",r[r.LOG=19]="LOG",r[r.LOG1P=20]="LOG1P",r[r.LOGICAL_NOT=21]="LOGICAL_NOT",r[r.NEG=22]="NEG",r[r.RELU=23]="RELU",r[r.RELU6=24]="RELU6",r[r.LEAKYRELU=25]="LEAKYRELU",r[r.RECIPROCAL=26]="RECIPROCAL",r[r.ROUND=27]="ROUND",r[r.RSQRT=28]="RSQRT",r[r.SELU=29]="SELU",r[r.SIGMOID=30]="SIGMOID",r[r.SIGN=31]="SIGN",r[r.SIN=32]="SIN",r[r.SINH=33]="SINH",r[r.SOFTPLUS=34]="SOFTPLUS",r[r.SQRT=35]="SQRT",r[r.SQUARE=36]="SQUARE",r[r.STEP=37]="STEP",r[r.TAN=38]="TAN",r[r.TANH=39]="TANH",r[r.TO_INT=40]="TO_INT"})(X||(X={}));var Vte="return abs(a);",zte=`
2022-11-20 22:20:02 +01:00
if (abs(a) > 1.) {
return uniforms.NAN;
}
return acos(a);
2023-01-06 19:23:06 +01:00
`,Wte=`
2022-11-20 22:20:02 +01:00
if (a < 1.) {
return uniforms.NAN;
}
return acosh(a);
2023-01-06 19:23:06 +01:00
`,Ute=`
2022-11-20 22:20:02 +01:00
if (abs(a) > 1.) {
return uniforms.NAN;
}
return asin(a);
2023-01-06 19:23:06 +01:00
`,Gte="return asinh(a);",Hte=`
2022-11-20 22:20:02 +01:00
if (isnan(a)) {
return uniforms.NAN;
}
return atan(a);
2023-01-06 19:23:06 +01:00
`,Kte=`
2022-11-20 22:20:02 +01:00
if (abs(a) > 1.) {
return uniforms.NAN;
}
if (a == 1.) {
return uniforms.INFINITY;
}
if (a == -1.) {
return -uniforms.INFINITY;
}
return atanh(a);
2023-01-06 19:23:06 +01:00
`,qte="return ceil(a);",jte="return cos(a);",Xte=`
2022-11-18 17:13:29 +01:00
let e2x = exp(-a);
return (e2x + 1.0 / e2x) / 2.0;
2023-01-06 19:23:06 +01:00
`,Yte="return exp(a) - 1.0;",Qte="if (a >= 0.0) { return a; } return (exp(a) - 1.0);",Zte=`
2022-11-18 17:13:29 +01:00
var resFloat = exp(a) - vec4<f32>(1.0);
if (a.r >= 0.0) {
resFloat.r = a.r;
}
if (a.g >= 0.0) {
resFloat.g = a.g;
}
if (a.b >= 0.0) {
resFloat.b = a.b;
}
if (a.a >= 0.0) {
resFloat.a = a.a;
}
return resFloat;
2023-01-06 19:23:06 +01:00
`,Jte=`
2022-11-20 22:20:02 +01:00
// Error function is calculated approximately with elementary function.
// See "Handbook of Mathematical Functions with Formulas,
// Graphs, and Mathematical Tables", Abramowitz and Stegun.
let p = ${S.ERF_P};
let a1 = ${S.ERF_A1};
let a2 = ${S.ERF_A2};
let a3 = ${S.ERF_A3};
let a4 = ${S.ERF_A4};
let a5 = ${S.ERF_A5};
let sign = sign(a);
let absA = abs(a);
let t = 1.0 / (1.0 + p * absA);
return sign * (1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-absA * absA));
2023-01-06 19:23:06 +01:00
`,ere="return exp(a);",tre="return floor(a);",rre="return f32(!isnan(a) && !isinf(a));",ore="return f32(isinf(a));",nre="return f32(isnan(a));",sre="return a;",are=`if (a < 0.0) { return uniforms.NAN; }
return log(a);`,ire=`
2022-11-20 22:20:02 +01:00
if (isnan(a)) { return a; }
return log(1.0 + a);
2023-01-06 19:23:06 +01:00
`,ure="return f32(!(a >= 1.0));",pre="return -a;",cre="if (a < 0.0) { return uniforms.alpha * a; } return a;",lre=`
2022-11-18 17:13:29 +01:00
let aLessThanZero = vec4<f32>(a < vec4<f32>(0.0));
return (aLessThanZero * (uniforms.alpha * a)) + ((vec4<f32>(1.0) - aLessThanZero) * a);
2023-01-06 19:23:06 +01:00
`,mre="return 1.0 / a;",dre="return select(a, 0.0, a < 0.0);",fre="return clamp(a, 0.0, 6.0);",hre="return clamp(a, vec4<f32>(0.0, 0.0, 0.0, 0.0), vec4<f32>(6.0, 6.0, 6.0, 6.0));",gre=`
2022-11-18 17:13:29 +01:00
return select(a, vec4<f32>(0.0), a < vec4<f32>(0.0));
2023-01-06 19:23:06 +01:00
`,xre="return round(a);",yre="return inverseSqrt(a);",bre=`
if (a >= 0.0) {
return ${S.SELU_SCALE} * a;
} else {
return ${S.SELU_SCALEALPHA} * (exp(a) - 1.0);
}
`,Cre="return 1.0 / (1.0 + exp(-1.0 * a));",Sre="return sign(a);",wre="return sin(a);",Ire=`
2022-11-18 17:13:29 +01:00
let e2x = exp(a);
return (e2x - 1.0 / e2x) / 2.0;
2023-01-06 19:23:06 +01:00
`,vre=`
let epsilon = 1.1920928955078125e-7;
let threshold = log(epsilon) + 2.0;
let too_large = a > -threshold;
let too_small = a < threshold;
let exp_a = exp(a);
if (too_large) {
return a;
} else if (too_small) {
return exp_a;
} else {
return log(exp_a + 1.0);
}
`,kre="return sqrt(a);",Nre="return a * a;",Tre=`
if (isnan(a)) {
return a;
}
return select(uniforms.stepAlpha, 1.0, a > 0.0);
`,_re="return tan(a);",$re=`
2022-11-18 17:13:29 +01:00
let e2x = exp(-2.0 * abs(a));
return sign(a) * (1.0 - e2x) / (1.0 + e2x);
2023-01-06 19:23:06 +01:00
`,Ere="return f32(i32((a)));";function ja(r,e){switch(r){case X.ABS:return Vte;case X.ACOS:return zte;case X.ACOSH:return Wte;case X.ASIN:return Ute;case X.ASINH:return Gte;case X.ATAN:return Hte;case X.ATANH:return Kte;case X.COS:return jte;case X.COSH:return Xte;case X.CEIL:return qte;case X.ELU:return e?Zte:Qte;case X.ERF:return Jte;case X.EXP:return ere;case X.EXPM1:return Yte;case X.FLOOR:return tre;case X.IS_FINITE:return rre;case X.IS_INF:return ore;case X.IS_NAN:return nre;case X.LINEAR:return sre;case X.LOG:return are;case X.LOG1P:return ire;case X.LOGICAL_NOT:return ure;case X.NEG:return pre;case X.LEAKYRELU:return e?lre:cre;case X.RECIPROCAL:return mre;case X.RELU:return e?gre:dre;case X.RELU6:return e?hre:fre;case X.ROUND:return xre;case X.RSQRT:return yre;case X.SELU:return bre;case X.SIGMOID:return Cre;case X.SIGN:return Sre;case X.SIN:return wre;case X.SINH:return Ire;case X.SOFTPLUS:return vre;case X.SQRT:return kre;case X.SQUARE:return Nre;case X.STEP:return Tre;case X.TAN:return _re;case X.TANH:return $re;case X.TO_INT:return Ere;default:throw new Error(`BinaryType ${r} is not implemented!`)}}var kt=r=>{switch(r){case 1:return"f32";case 2:return"vec2<f32>";case 3:return"vec3<f32>";case 4:return"vec4<f32>";default:throw new Error(`${r}-component is not supported.`)}};function pr(r,e=!1,t=!1,o=3){if(r===null)return"";let n="";if(r==="linear")n=ja(X.LINEAR);else if(r==="relu")n=ja(X.RELU,t);else if(r==="elu")n=ja(X.ELU,t);else if(r==="relu6")n=ja(X.RELU6,t);else if(r==="prelu")n=kc(ge.PRELU,t);else if(r==="sigmoid")n=ja(X.SIGMOID,t);else if(r==="leakyrelu")n=ja(X.LEAKYRELU,t);else throw new Error(`Activation ${r} has not been implemented for the WebGPU backend.`);let a=kt(t?4:1),i="";return e?i=`
2022-11-18 17:13:29 +01:00
fn activation(a : ${a}, coords : vec${o}<i32>) -> ${a} {
let b = getPreluActivationWeightsByOutputCoords(coords);
${n}
}`:i=`
fn activation(a : ${a}, coords : vec${o}<i32>) -> ${a} {
${n}
2023-01-06 19:23:06 +01:00
}`,i}function Gr(r,e){return`
2022-11-18 17:13:29 +01:00
${r?"value = value + getBiasByOutputCoords(coords);":""}
${e?"value = activation(value, coords);":""}
2023-01-06 19:23:06 +01:00
`}function xI(r,e,t=!1,o=!1,n=!1,s=1){y.assert(r&&s===1||!r,()=>`transposeA ${r} is not compatible with component size ${s}`);let a=`
${r?"value = getA(batch, col, row);":"value = getA(batch, row, col);"}
`,i=e?"value = getB(batch, col, row);":"value = getB(batch, row, col);";return`
fn mm_readA(batch: i32, row: i32, colIn: i32) -> ${kt(s)} {
var value = ${kt(s)}(0.0);
let col = colIn * ${s};
${t&&n?a:`
${r?"if(row < uniforms.dimAOuter && col < uniforms.dimInner)":"if(row < uniforms.aShape[1] && col < uniforms.aShape[2])"}
2022-11-18 17:13:29 +01:00
{
2023-01-06 19:23:06 +01:00
${a}
2022-11-18 17:13:29 +01:00
}
`}
return value;
}
2023-01-06 19:23:06 +01:00
fn mm_readB(batch: i32, row: i32, colIn: i32) -> ${kt(s)} {
let col = colIn * ${s};
var value = ${kt(s)}(0.0);
${i}
2022-11-18 17:13:29 +01:00
return value;
}
2023-01-06 19:23:06 +01:00
`}function Wl(r,e,t,o,n=!1,s=!1,a=!1,i=1){return`
${xI(t,o,n,s,a,i)}
fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${kt(i)}) {
let col = colIn * ${i};
${n&&s?"":"if (row < uniforms.dimAOuter && col < uniforms.dimBOuter)"}
2022-11-18 17:13:29 +01:00
{
var value = valueIn;
let coords = vec3<i32>(batch, row, col);
2023-01-06 19:23:06 +01:00
${Gr(r,e)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], value);
}
}
2023-01-06 19:23:06 +01:00
`}var Are=(r,e)=>r?`
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
2023-01-06 19:23:06 +01:00
globalRowStart / ${e} + inputCol);
2022-11-18 17:13:29 +01:00
`:`
2023-01-06 19:23:06 +01:00
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
globalRow + innerRow,
2023-01-06 19:23:06 +01:00
kStart / ${e} + inputCol);
`,Rre=(r,e,t)=>r?`
let ACached0 = mm_Asub[k * ${e}][localRow];
let ACached1 = mm_Asub[k * ${e} + 1][localRow];
let ACached2 = mm_Asub[k * ${e} + 2][localRow];
${e===3?"":`let ACached3 = mm_Asub[k * ${e} + 3][localRow];`}
for (var i = 0; i < ${t}; i++) {
2022-11-18 17:13:29 +01:00
acc[i] = BCached0 * ACached0[i] + acc[i];
acc[i] = BCached1 * ACached1[i] + acc[i];
acc[i] = BCached2 * ACached2[i] + acc[i];
${e===3?"":"acc[i] = BCached3 * ACached3[i] + acc[i];"}
}`:`
2023-01-06 19:23:06 +01:00
for (var i = 0; i < ${t}; i++) {
2022-11-18 17:13:29 +01:00
let ACached = mm_Asub[tileRow + i][k];
acc[i] = BCached0 * ACached.x + acc[i];
acc[i] = BCached1 * ACached.y + acc[i];
acc[i] = BCached2 * ACached.z + acc[i];
${e===3?"":"acc[i] = BCached3 * ACached.w + acc[i];"}
2023-01-06 19:23:06 +01:00
}`;function qu(r,e,t=!1,o=32,n=!1,s=32,a=!1,i=!1){let p=e[1]*r[1],u=e[0]*r[0],c=t?p:o,l=t?o:p,m=c/e[0],d=o/e[1],f=r[1];return y.assert((t&&m===4&&r[1]===4||!t&&(m===3||m===4))&&c%e[0]===0&&o%e[1]===0&&r[0]===4,()=>`If transposeA ${t} is true, innerElementSize ${m} and workPerThread[1] ${r[1]} must be 4.
Otherwise, innerElementSize ${m} must be 3 or 4.
tileAWidth ${c} must be divisible by workgroupSize[0]${e[0]}. tileInner ${o} must be divisible by workgroupSize[1] ${e[1]}. colPerThread ${r[0]} must be 4.`),`
var<workgroup> mm_Asub : array<array<vec${m}<f32>, ${c/m}>, ${l}>;
var<workgroup> mm_Bsub : array<array<vec4<f32>, ${u/r[0]}>, ${o}>;
${Q()} {
2022-11-18 17:13:29 +01:00
let localRow = i32(localId.y);
2023-01-06 19:23:06 +01:00
let tileRow = ${a?"0":`localRow * ${f}`};
2022-11-18 17:13:29 +01:00
let tileCol = i32(localId.x);
2023-01-06 19:23:06 +01:00
let globalRow = ${a?"0":`i32(globalId.y) * ${f}`};
2022-11-18 17:13:29 +01:00
let globalCol = i32(globalId.x);
let batch = ${n?"0":"i32(globalId.z)"};
2023-01-06 19:23:06 +01:00
let batchA = ${n||!i?"batch":"batch % uniforms.aShape[0]"};
let batchB = ${n||!i?"batch":"batch % uniforms.bShape[0]"};
let globalRowStart = i32(workgroupId.y) * ${p};
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
let numTiles = ${n?`${Math.ceil(s/o)}`:`(uniforms.dimInner - 1) / ${o} + 1`};
2022-11-18 17:13:29 +01:00
var kStart = ${n?`i32(globalId.z) * ${s}`:"0"};
2023-01-06 19:23:06 +01:00
var acc: array<vec4<f32>, ${f}>;
2022-11-18 17:13:29 +01:00
// Loop over shared dimension.
2023-01-06 19:23:06 +01:00
let tileRowB = localRow * ${d};
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${f}; innerRow++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRow + innerRow;
let inputCol = tileCol;
2023-01-06 19:23:06 +01:00
${Are(t,m)}
2022-11-18 17:13:29 +01:00
}
// Load one tile of B into local memory.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${d}; innerRow++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRowB + innerRow;
let inputCol = tileCol;
2023-01-06 19:23:06 +01:00
mm_Bsub[inputRow][inputCol] = mm_readB(batchB, kStart + inputRow, globalCol);
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
kStart = kStart + ${o};
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-01-06 19:23:06 +01:00
for (var k = 0; k < ${o/m}; k++) {
let BCached0 = mm_Bsub[k * ${m}][tileCol];
let BCached1 = mm_Bsub[k * ${m} + 1][tileCol];
let BCached2 = mm_Bsub[k * ${m} + 2][tileCol];
${m===3?"":`let BCached3 = mm_Bsub[k * ${m} + 3][tileCol];`}
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
${Rre(t,m,f)}
2022-11-18 17:13:29 +01:00
}
workgroupBarrier();
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${f}; innerRow++) {
2022-11-18 17:13:29 +01:00
mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]);
}
2023-01-06 19:23:06 +01:00
}`}var z3=r=>r?`
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
globalRowStart + inputCol);
`:`
2023-01-06 19:23:06 +01:00
mm_Asub[inputRow][inputCol] = mm_readA(batchA,
2022-11-18 17:13:29 +01:00
globalRowStart + inputRow,
kStart + inputCol);
2023-01-06 19:23:06 +01:00
`,Dre=r=>r?"let ACached = mm_Asub[k][tileRow + innerRow];":"let ACached = mm_Asub[tileRow + innerRow][k];";function ju(r,e,t=!1,o=32,n=!1,s=32,a=!1,i=!1){let p=r[1]*e[1],u=r[0]*e[0],c=t?p:o,l=t?o:p;y.assert(l%e[1]===0&&c%e[0]===0&&o%e[1]===0,()=>`tileAHight ${l} must be divisible by workgroupSize[1]${e[1]}, tileAWidth ${c} must be divisible by workgroupSize[0]${e[0]}, tileInner ${o} must be divisible by workgroupSize[1]${e[1]}`);let m=l/e[1],d=c/e[0],f=o/e[1],h=r[1],g=r[0],x=a?`
2022-11-18 17:13:29 +01:00
let localRow = i32(localId.y);
let localCol = i32(localId.x);
2023-01-06 19:23:06 +01:00
let globalRowStart = i32(workgroupId.y) * ${p};
let globalColStart = i32(workgroupId.x) * ${u};
2022-11-18 17:13:29 +01:00
// Loop over shared dimension.
2023-01-06 19:23:06 +01:00
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-01-06 19:23:06 +01:00
for (var inputRow = localRow; inputRow < ${l}; inputRow = inputRow + ${e[1]}) {
for (var inputCol = localCol; inputCol < ${c}; inputCol = inputCol + ${e[0]}) {
${z3(t)}
2022-11-18 17:13:29 +01:00
}
}
// Load one tile of B into local memory.
for (var inputRow = localRow; inputRow < ${o}; inputRow = inputRow + ${e[1]}) {
2023-01-06 19:23:06 +01:00
for (var inputCol = localCol; inputCol < ${u}; inputCol = inputCol + ${e[0]}) {
mm_Bsub[inputRow][inputCol] = mm_readB(batchB,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
globalColStart + inputCol);
}
}
2023-01-06 19:23:06 +01:00
kStart = kStart + ${o};
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-01-06 19:23:06 +01:00
var BCached : array<f32, ${g}>;
for (var k = 0; k < ${o}; k++) {
for (var inner = 0; inner < ${g}; inner++) {
2022-11-18 17:13:29 +01:00
BCached[inner] = mm_Bsub[k][localCol + inner * ${e[0]}];
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
2022-11-18 17:13:29 +01:00
let ACached = ${t?`mm_Asub[k][localRow + innerRow * ${e[1]}];`:`mm_Asub[localRow + innerRow * ${e[1]}][k];`}
2023-01-06 19:23:06 +01:00
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
acc[innerRow][innerCol] = acc[innerRow][innerCol] +
ACached * BCached[innerCol];
}
}
}
workgroupBarrier();
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
2022-11-18 17:13:29 +01:00
let gRow = globalRowStart + localRow + innerRow * ${e[1]};
2023-01-06 19:23:06 +01:00
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
let gCol = globalColStart + localCol + innerCol * ${e[0]};
mm_write(batch, gRow, gCol, acc[innerRow][innerCol]);
}
}
`:`
2023-01-06 19:23:06 +01:00
let tileRow = i32(localId.y) * ${h};
let tileCol = i32(localId.x) * ${g};
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
let globalRow = i32(globalId.y) * ${h};
let globalCol = i32(globalId.x) * ${g};
let globalRowStart = i32(workgroupId.y) * ${p};
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
let tileRowA = i32(localId.y) * ${m};
let tileColA = i32(localId.x) * ${d};
let tileRowB = i32(localId.y) * ${f};
2022-11-18 17:13:29 +01:00
// Loop over shared dimension.
2023-01-06 19:23:06 +01:00
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${m}; innerRow++) {
for (var innerCol = 0; innerCol < ${d}; innerCol++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRowA + innerRow;
let inputCol = tileColA + innerCol;
2023-01-06 19:23:06 +01:00
${z3(t)}
2022-11-18 17:13:29 +01:00
}
}
// Load one tile of B into local memory.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${f}; innerRow++) {
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
let inputRow = tileRowB + innerRow;
let inputCol = tileCol + innerCol;
2023-01-06 19:23:06 +01:00
mm_Bsub[inputRow][inputCol] = mm_readB(batchB,
2022-11-18 17:13:29 +01:00
kStart + inputRow,
globalCol + innerCol);
}
}
2023-01-06 19:23:06 +01:00
kStart = kStart + ${o};
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-01-06 19:23:06 +01:00
var BCached : array<f32, ${g}>;
for (var k = 0; k < ${o}; k++) {
for (var inner = 0; inner < ${g}; inner++) {
2022-11-18 17:13:29 +01:00
BCached[inner] = mm_Bsub[k][tileCol + inner];
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
${Dre(t)}
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
acc[innerRow][innerCol] = acc[innerRow][innerCol] + ACached * BCached[innerCol];
}
}
}
workgroupBarrier();
}
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
mm_write(batch, globalRow + innerRow, globalCol + innerCol,
acc[innerRow][innerCol]);
}
}
`;return`
2023-01-06 19:23:06 +01:00
var<workgroup> mm_Asub : array<array<f32, ${c}>, ${l}>;
var<workgroup> mm_Bsub : array<array<f32, ${u}>, ${o}>;
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
${Q()} {
2022-11-18 17:13:29 +01:00
let batch = ${n?"0":"i32(globalId.z)"};
2023-01-06 19:23:06 +01:00
let batchA = ${n||!i?"batch":"batch % uniforms.aShape[0]"};
let batchB = ${n||!i?"batch":"batch % uniforms.bShape[0]"};
let numTiles = ${n?`${Math.ceil(s/o)}`:`(uniforms.dimInner - 1) / ${o} + 1`};
2022-11-18 17:13:29 +01:00
var kStart = ${n?`i32(globalId.z) * ${s}`:"0"};
2023-01-06 19:23:06 +01:00
var acc : array<array<f32, ${g}>, ${h}>;
2022-11-18 17:13:29 +01:00
// Without this initialization strange values show up in acc.
2023-01-06 19:23:06 +01:00
for (var innerRow = 0; innerRow < ${h}; innerRow++) {
for (var innerCol = 0; innerCol < ${g}; innerCol++) {
2022-11-18 17:13:29 +01:00
acc[innerRow][innerCol] = 0.0;
}
}
2023-01-06 19:23:06 +01:00
${x}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
`}var Fre=r=>r?`
mm_readA(batchA, colA, globalRow),
mm_readA(batchA, colA + 1, globalRow),
mm_readA(batchA, colA + 2, globalRow),
mm_readA(batchA, colA + 3, globalRow)
2022-11-18 17:13:29 +01:00
`:`
2023-01-06 19:23:06 +01:00
mm_readA(batchA, globalRow, colA),
mm_readA(batchA, globalRow, colA + 1),
mm_readA(batchA, globalRow, colA + 2),
mm_readA(batchA, globalRow, colA + 3)
`;function Ore(r,e=!1){y.assert(r[1]===1&&r[2]===1,()=>`A linear work group size is required. But got ${r}.`);let t=r[0]*4;return`
2022-11-18 17:13:29 +01:00
var<workgroup> mm_Asub : array<vec4<f32>, ${r[0]}>;
2023-01-06 19:23:06 +01:00
${Q()} {
2022-11-18 17:13:29 +01:00
let tileCol = i32(localId.x);
let globalCol = i32(globalId.x);
let globalRow = i32(globalId.y);
2023-01-06 19:23:06 +01:00
let numTiles = (uniforms.dimInner - 1) / ${t} + 1;
2022-11-18 17:13:29 +01:00
let batch = i32(globalId.z);
2023-01-06 19:23:06 +01:00
let batchA = batch % uniforms.aShape[0];
let batchB = batch % uniforms.bShape[0];
2022-11-18 17:13:29 +01:00
// Without this initialization strange values show up in acc.
var acc = 0.0;
// Loop over shared dimension.
2023-01-06 19:23:06 +01:00
for (var t = 0; t < numTiles; t++) {
2022-11-18 17:13:29 +01:00
// Load one tile of A into local memory.
2023-01-06 19:23:06 +01:00
let colA = t * ${t} + tileCol * 4;
mm_Asub[tileCol] = vec4<f32>(${Fre(e)});
2022-11-18 17:13:29 +01:00
workgroupBarrier();
// Compute acc values for a single thread.
2023-01-06 19:23:06 +01:00
for (var k = 0; k < ${t/4}; k++) {
let rowB = t * ${t} + k * 4;
let BCached = vec4<f32>(mm_readB(batchB, rowB, globalCol),
mm_readB(batchB, rowB + 1, globalCol),
mm_readB(batchB, rowB + 2, globalCol),
mm_readB(batchB, rowB + 3, globalCol));
2022-11-18 17:13:29 +01:00
let ACached = mm_Asub[k];
acc = acc + dot(ACached, BCached);
}
workgroupBarrier();
}
mm_write(batch, globalRow, globalCol, acc);
}
2023-01-06 19:23:06 +01:00
`}var Eg=class{constructor(e,t,o=!1,n=!1,s=null,a=null,i=null,p=!1){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=t,this.dispatchLayout={x:[2],y:[1],z:[0]};let u=o?e[1]:e[2];if(this.isVec4=(u%4===0&&!o||t[1]%4===0&&o)&&t[2]%4===0&&!n,this.isVectorA=t[1]===1&&!o,!this.isVec4&&this.isVectorA)this.elementsPerThread=[1,1,1],this.workgroupSize=[32,1,1];else{let m=dI(t[1],u,t[2],o);this.workgroupSize=m.workgroupSize,this.elementsPerThread=m.elementsPerThread}this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,this.elementsPerThread);let c=s!=null,l=i!=null;c&&this.variableNames.push("bias"),l&&this.variableNames.push("preluActivationWeights"),this.sequentialAccessByThreads=p,this.transposeA=o,this.transposeB=n,this.addBias=c,this.activation=a,this.hasPreluActivationWeights=l,[this.fitAOuter,this.fitBOuter,this.fitInner]=this.getShapeFit(t[1],t[2],u),this.shaderKey=`matMulPacked_${this.elementsPerThread}_${o}_${n}_${this.activation}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.isVectorA}_${this.sequentialAccessByThreads}`}getShapeFit(e,t,o){let n=this.workgroupSize[1]*this.elementsPerThread[1],s=this.workgroupSize[0]*this.elementsPerThread[0];!this.isVec4&&this.isVectorA?this.tileInner=this.workgroupSize[0]*4:this.tileInner=s;let a=e%n===0,i=t%s===0,p=o%this.tileInner===0;return[a,i,p]}getUserCode(){return`
${pr(this.activation,this.hasPreluActivationWeights,this.isVec4)}
${Wl(this.addBias,this.activation,!1,this.transposeB,this.fitAOuter,this.fitBOuter,this.fitInner,this.isVec4?4:1)}
${this.isVec4?qu(this.elementsPerThread,this.workgroupSize,this.transposeA,this.tileInner,!1,null,this.isVectorA,!0):this.isVectorA?Ore(this.workgroupSize,this.transposeA):ju(this.elementsPerThread,this.workgroupSize,this.transposeA,this.tileInner,!1,null,this.sequentialAccessByThreads,!0)}
`}};function Pre(r){return`
var<workgroup> sumValues : array<f32, ${r}>;
${Q()} {
2022-11-18 17:13:29 +01:00
let coords = getOutputCoords();
let batch = coords[0];
2023-01-06 19:23:06 +01:00
let batchA = batch % uniforms.aShape[0];
let batchB = batch % uniforms.bShape[0];
2022-11-18 17:13:29 +01:00
let row = coords[1];
let col = coords[2];
var sum = 0.0;
let Length = uniforms.dimInner;
2023-01-06 19:23:06 +01:00
for (var k = i32(localId.x); k < Length; k = k + ${r}) {
let dataA = mm_readA(batchA, row, k);
let dataB = mm_readB(batchB, k, col);
2022-11-18 17:13:29 +01:00
sum = sum + dataA * dataB;
}
sumValues[localId.x] = sum;
workgroupBarrier();
2023-01-06 19:23:06 +01:00
for(var currentSize = ${r/2}u; currentSize > 1u;
2022-11-18 17:13:29 +01:00
currentSize = currentSize / 2u) {
if (localId.x < currentSize)
{
sumValues[localId.x] = sumValues[localId.x] + sumValues[localId.x + currentSize];
}
workgroupBarrier();
}
if (localId.x == 0u) {
sum = sumValues[0] + sumValues[1];
mm_write(batch, row, col, sum);
}
}
2023-01-06 19:23:06 +01:00
`}var Ag=class{constructor(e,t=!1,o=!1,n=null,s=null,a=null){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workgroupSize=[256,1,1],this.outputShape=e,this.dispatchLayout={x:[],y:[1,2],z:[0]},this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize);let i=n!=null,p=a!=null;i&&this.variableNames.push("bias"),p&&this.variableNames.push("preluActivationWeights"),this.transposeA=t,this.transposeB=o,this.addBias=i,this.activation=s,this.hasPreluActivationWeights=p,this.shaderKey=`matMulReduce_${this.activation}_${t}_${o}`}getUserCode(){return`
${pr(this.activation,this.hasPreluActivationWeights)}
${Wl(this.addBias,this.activation,this.transposeA,this.transposeB)}
${Pre(this.workgroupSize[0])}
`}};function Mre(r){let e=r[1],t=r[0],o=e>t?e:t;return`
2022-11-18 17:13:29 +01:00
var<workgroup> mm_Asub : array<array<f32, ${o}>, ${e}>;
var<workgroup> mm_Bsub : array<array<f32, ${t}>, ${o}>;
// If the output size is small for matrix multiplication, avoid to use vec4
// and handle some elements per thread to optimally utilize the ALU.
// Read data from global memory to registers firstly, then store them into
// shared memory, so it is instruction-Level parallelism for arithmetic
// operations and others handle IO operations between barrier api, makes ALU
// and load/store units work simultaneously, could improves the performance.
2023-01-06 19:23:06 +01:00
${Q()} {
2022-11-18 17:13:29 +01:00
let tileRow = i32(localId.y);
let tileCol = i32(localId.x);
let globalRow = i32(globalId.y);
let globalCol = i32(globalId.x);
let batch = i32(globalId.z);
2023-01-06 19:23:06 +01:00
let batchA = batch % uniforms.aShape[0];
let batchB = batch % uniforms.bShape[0];
2022-11-18 17:13:29 +01:00
// uniforms.dimInner should be greater than 0.
let numTiles = (uniforms.dimInner - 1) / ${o} + 1;
var acc = 0.0;
var globalColA = tileCol;
var globalRowB = 0;
2023-01-06 19:23:06 +01:00
var regA = mm_readA(batchA, globalRow, globalColA);
var regB0 = mm_readB(batchB, globalRowB + 2 * tileRow, globalCol);
var regB1 = mm_readB(batchB, globalRowB + 2 * tileRow + 1, globalCol);
2022-11-18 17:13:29 +01:00
globalColA = globalColA + ${o};
globalRowB = globalRowB + ${o};
for (var t = 0; t < numTiles; t = t + 1) {
mm_Asub[tileRow][tileCol] = regA;
mm_Bsub[2 * tileRow][tileCol] = regB0;
mm_Bsub[2 * tileRow + 1][tileCol] = regB1;
workgroupBarrier();
2023-01-06 19:23:06 +01:00
regA = mm_readA(batchA, globalRow, globalColA);
regB0 = mm_readB(batchB, globalRowB + 2 * tileRow, globalCol);
regB1 = mm_readB(batchB, globalRowB + 2 * tileRow + 1, globalCol);
2022-11-18 17:13:29 +01:00
globalColA = globalColA + ${o};
globalRowB = globalRowB + ${o};
for (var k = 0; k < ${o}; k = k + 1) {
acc = acc + mm_Asub[tileRow][k] * mm_Bsub[k][tileCol];
}
workgroupBarrier();
}
mm_write(batch, globalRow, globalCol, acc);
}
2023-01-06 19:23:06 +01:00
`}var Rg=class{constructor(e,t,o,n=!1,s=!1,a=null,i=null,p=null){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workgroupSize=[16,8,1],this.outputShape=o,this.dispatchLayout={x:[2],y:[1],z:[0]},this.dispatch=[Math.ceil(o[2]/this.workgroupSize[0]),Math.ceil(o[1]/this.workgroupSize[1]),o[0]];let u=a!=null;u&&this.variableNames.push("bias");let c=p!=null;c&&this.variableNames.push("preluActivationWeights"),this.transposeA=n,this.transposeB=s,this.addBias=u,this.activation=i,this.hasPreluActivationWeights=c,this.shaderKey=`matMulSmallOutputSize_${this.activation}_${n}_${s}`}getUserCode(){return`
${pr(this.activation,this.hasPreluActivationWeights)}
${Wl(this.addBias,this.activation,this.transposeA,this.transposeB)}
${Mre(this.workgroupSize)}
`}};var Dg=class{constructor(e,t,o=!1,n=!1){this.variableNames=["A","B"],this.uniforms="dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.workgroupSize=[8,8,1],this.atomic=!0,this.isVec4=!1,this.splitedDimInner=128,y.assert(e[0]===1,()=>"MatMulSplitKProgram only supports batch = 1."),this.outputShape=e,this.dispatchLayout={x:[2],y:[1],z:[0,3]},this.isVec4=(o&&this.outputShape[1]%4===0||!o&&t%4===0)&&this.outputShape[2]%4===0,this.elementsPerThread=[4,4,this.splitedDimInner],this.isVec4||(this.outputShape[1]<16&&(this.elementsPerThread[1]=1),this.outputShape[2]<16&&(this.elementsPerThread[0]=1)),this.dispatch=Y(this.dispatchLayout,[this.outputShape[0],this.outputShape[1],this.outputShape[2],t],this.workgroupSize,this.elementsPerThread),this.transposeA=o,this.transposeB=n,this.shaderKey=`matMulSplitK_${o}_${n}_${this.elementsPerThread}_${this.isVec4}`}getUserCode(){let e=this.isVec4?4:1;return`
${xI(!1,this.transposeB,!1,!1,!1,e)}
fn mm_write(batch: i32, row : i32, colIn : i32, value : ${kt(e)}) {
let col = colIn * ${e};
2022-11-18 17:13:29 +01:00
if (row < uniforms.dimAOuter && col < uniforms.dimBOuter) {
let coords = vec3<i32>(batch, row, col);
let flatIndex = getOutputIndexFromCoords(coords);
// The problem is that we should initialize output to zero before using.
// Otherwise, the original value will be added to the result.
2023-01-06 19:23:06 +01:00
for (var i = 0; i < ${e}; i = i + 1) {
${Ic("&result[flatIndex + i]",`${e>1?"value[i]":"value"}`,"float32")}
}
2022-11-18 17:13:29 +01:00
}
}
2023-01-06 19:23:06 +01:00
${this.isVec4?qu(this.elementsPerThread,this.workgroupSize,this.transposeA,32,!0,this.splitedDimInner):ju(this.elementsPerThread,this.workgroupSize,this.transposeA,32,!0,this.splitedDimInner)}
`}},Fg=class{constructor(e,t=null,o=null,n=null){this.uniforms="",this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.addBias=t!=null,this.hasPreluActivationWeights=n!=null,this.activation=o,this.addBias&&this.variableNames.push("bias"),this.hasPreluActivationWeights&&this.variableNames.push("preluActivationWeights"),this.shaderKey=`biasActivation_${o}`}getUserCode(){return`
${pr(this.activation,this.hasPreluActivationWeights)}
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var value = getXByOutputIndex(index);
2023-01-06 19:23:06 +01:00
${Gr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
setOutputAtIndex(index, value);
}
}
2023-01-06 19:23:06 +01:00
`}};var Og=class{constructor(e){this.variableNames=[],this.outputShape=[],this.uniforms="value : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="fill"}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
setOutputAtIndex(index, uniforms.value);
}
}
2023-01-06 19:23:06 +01:00
`}};function fr(r){let{backend:e,attrs:t}=r,{shape:o,value:n}=t,{dtype:s}=t;if(s=s||y.inferDtype(n),s==="string"){let a=y.getArrayFromDType(s,y.sizeFromShape(o));return a.fill(n),e.makeTensorInfo(o,s,a)}else{let a=new Og(o),i=[{type:"float32",data:[n]}];return e.runWebGPUProgram(a,[],s,i)}}var W3={kernelName:ws,backendName:"webgpu",kernelFunc:fr};function me(r){let{inputs:e,attrs:t}=r,{x:o}=e,{shape:n}=t,s=y.sizeFromShape(o.shape),a=y.inferFromImplicitShape(n,s),i=y.sizeFromShape(a);return y.assert(s===i,()=>`The new shape (${a}) has ${i} elements and the old shape (${o.shape}) has ${s} elements. The new shape and old shape must have the same number of elements.`),r.backend.incRef(o.dataId),{dataId:o.dataId,shape:a,dtype:o.dtype}}var U3={kernelName:_s,backendName:"webgpu",kernelFunc:me};function Xu({a:r,b:e,transposeA:t,transposeB:o,backend:n,bias:s=null,preluActivationWeights:a=null,leakyreluAlpha:i=0,activation:p=null}){let u=r.shape.length,c=e.shape.length,l=t?r.shape[u-2]:r.shape[u-1],m=o?e.shape[c-1]:e.shape[c-2],d=t?r.shape[u-1]:r.shape[u-2],f=o?e.shape[c-2]:e.shape[c-1],h=r.shape.slice(0,-2),g=e.shape.slice(0,-2),x=y.sizeFromShape(h),b=y.sizeFromShape(g),w=yr.assertAndGetBroadcastShape(r.shape.slice(0,-2),e.shape.slice(0,-2)).concat([d,f]);y.assert(l===m,()=>`Error in matMul: inner shapes (${l}) and (${m}) of Tensors with shapes ${r.shape} and ${e.shape} and transposeA=${t} and transposeB=${o} must match.`);let k=t?[x,l,d]:[x,d,l],_=o?[b,f,m]:[b,m,f],E=me({inputs:{x:r},backend:n,attrs:{shape:k}}),A=me({inputs:{x:e},backend:n,attrs:{shape:_}}),R=[E,A],D=Math.max(x,b),P=[E,A],M=[{type:"int32",data:[d]},{type:"int32",data:[f]},{type:"int32",data:[l]}],L,V,z=[D,d,f],U=O().get("WEBGPU_MATMUL_PROGRAM_TYPE");if(U<0){let H=O().getNumber("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL"),q=H>0?H:n.thresholdToIncreaseWorkgroups,Z=D*Math.ceil(d/32)*Math.ceil(f/32);Z<=q||d<=8&&Z<=q*2?D*d*f<=128?U=Ro.MatMulReduceProgram:D===1&&m>=2e3?U=Ro.MatMulSplitKProgram:U=Ro.MatMulSmallOutputSizeProgram:U=Ro.MatMulPackedProgram}switch(U){case Ro.MatMulReduceProgram:L=new Ag(z,t,o,s,p,a);break;case Ro.MatMulSplitKProgram:{if(V=fr({backend:n,attrs:{shape:z,value:0,dtype:r.dtype}}),L=new Dg(z,m,t,o),s||p){V=n.runWebGPUProgram(L,P,r.dtype,M,V);let q=new Fg(V.shape,s,p,a),Z=null,ee=[V];s&&ee.push(s),a&&ee.push(a),p==="leakyrelu"&&(Z=[{type:"float32",data:[i]}],q.uniforms+=" alpha : f32,");let oe=n.runWebGPUProgram(q,ee,V.dtype,Z);R.push(V);let J=me({inputs:{x:oe},backend:n,attrs:{shape:w}});R.push(oe);for(let te of R)n.disposeData(te.dataId);return J}break}case Ro.MatMulSmallOutputSizeProgram:L=new Rg(k,_,z,t,o,s,p,a);break;case Ro.MatMulPackedProgram:let H=n.adapterInfo.isIntel();L=new Eg(k,z,t,o,s,p,a,H);break;default:throw new Error(`Unsupported MatMulProgramType ${U}.`)}s&&P.push(s),a&&P.push(a),p==="leakyrelu"&&(M.push({type:"float32",data:[i]}),L.uniforms+=" alpha : f32,"),V=n.runWebGPUProgram(L,P,r.dtype,M,V);let K=me({inputs:{x:V},backend:n,attrs:{shape:w}});R.push(V);for(let H of R)n.disposeData(H.dataId);return K}function Lre(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s,bias:a,preluActivationWeights:i}=e,{transposeA:p,transposeB:u,activation:c,leakyreluAlpha:l}=o;return Xu({a:n,b:s,transposeA:p,transposeB:u,backend:t,bias:a,preluActivationWeights:i,leakyreluAlpha:l,activation:c})}var G3={kernelName:ho,backendName:"webgpu",kernelFunc:Lre};var Ul=class{constructor(e,t,o){this.variableNames=["AReal","AImag","BReal","BImag"],this.workgroupSize=[128,1,1],this.size=!0,this.outputShape=S.assertAndGetBroadcastShape(t,o),this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey=`binaryOpComplex_${e}`,this.op=e}getUserCode(){return`
2022-11-18 17:13:29 +01:00
fn binaryOpComplex(
areal : f32, aimag : f32, breal : f32, bimag : f32) -> f32 {
2023-01-06 19:23:06 +01:00
${kc(this.op,!1)}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
if(index < uniforms.size) {
let areal = getARealByOutputIndex(index);
let aimag = getAImagByOutputIndex(index);
let breal = getBRealByOutputIndex(index);
let bimag = getBImagByOutputIndex(index);
setOutputAtIndex(index, binaryOpComplex(areal, aimag, breal, bimag));
}
}
2023-01-06 19:23:06 +01:00
`}};var Yu=class{constructor(e,t,o){this.size=!0,this.variableNames=["A","B"],this.outputShape=S.assertAndGetBroadcastShape(t,o),this.dispatchLayout=ae(this.outputShape),this.op=e,this.useSharedMemoryWithA=t.length<=1&&o.length>1&&t[0]<128,this.useSharedMemoryWithB=o.length<=1&&t.length>1&&o[0]<128,this.useSharedMemoryWithA||this.useSharedMemoryWithB?(this.isVec4=!1,this.lastDimensionSize=this.useSharedMemoryWithB?o[0]:t[0],this.shaderKey=`binary_${this.type}_${e}_${this.lastDimensionSize}_${this.useSharedMemoryWithB}`,this.type="shared",this.workgroupSize=[256,1,1],this.workPerThread=1):(y.arraysEqual(t,o)&&y.sizeFromShape(t)%4===0?(this.isVec4=!0,this.type="vec4",this.workPerThread=4):(this.isVec4=!1,this.type="plain",this.workPerThread=1),this.shaderKey=`binary_${this.type}_${e}`,this.workgroupSize=[128,1,1]),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1])}getUserCode(){let e,t=this.isVec4?"vec4<f32>":"f32",o=`
2022-11-18 17:13:29 +01:00
fn binaryOperation(a : ${t}, b : ${t}) -> ${t} {
2023-01-06 19:23:06 +01:00
let isNaN = false;
{
${kc(this.op,this.isVec4)}
}
2022-11-18 17:13:29 +01:00
};
`;if(this.type==="shared"){let n=this.lastDimensionSize>1?`coords[${this.outputShape.length-1}]`:"0",s=this.useSharedMemoryWithB?`let a = getAByOutputIndex(index);
let b = sharedBuf[${n}];`:`let a = sharedBuf[${n}];
let b = getBByOutputIndex(index);`;e=`
${o}
var<workgroup> sharedBuf : array<f32, ${this.lastDimensionSize}>;
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
// Fill in the shared memory buffer.
let localIndex = i32(localId.x);
if(localIndex < ${this.lastDimensionSize}) {
sharedBuf[localIndex] = f32(${this.useSharedMemoryWithB?"B":"A"}[localIndex]);
}
workgroupBarrier();
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
${s}
setOutputAtIndex(index, binaryOperation(a, b));
}
}
`}else e=`
${o}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let a = getAByOutputIndex(index);
let b = getBByOutputIndex(index);
setOutputAtIndex(index, binaryOperation(a, b));
}
}
2023-01-06 19:23:06 +01:00
`;return e}};function Dt(r){let{inputs:e}=r,{x:t}=e;return r.backend.incRef(t.dataId),{dataId:t.dataId,shape:t.shape,dtype:t.dtype}}var H3={kernelName:mo,backendName:"webgpu",kernelFunc:Dt};function po(r){let{inputs:e,backend:t}=r,{real:o,imag:n}=e,s=t.makeTensorInfo(o.shape,"complex64"),a=t.tensorMap.get(s.dataId),i=Dt({inputs:{x:o},backend:t}),p=Dt({inputs:{x:n},backend:t});return a.complexTensorInfos={real:i,imag:p},s}var K3={kernelName:ri,backendName:"webgpu",kernelFunc:po};var Hr=class{constructor(e,t,o=""){this.variableNames=["A"],this.size=!0;let n=128;this.workgroupSize=[n,1,1],this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.op=t,o!==""&&(this.uniforms=o),this.shaderKey=`unary_${t}`}getUserCode(){return`
2022-11-18 17:13:29 +01:00
fn unaryOperation(a : f32) -> f32 {
2023-01-06 19:23:06 +01:00
${ja(this.op,!1)}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let a = getAByOutputIndex(index);
setOutputAtIndex(index, unaryOperation(a));
}
}
2023-01-06 19:23:06 +01:00
`}};function xe({opType:r,cpuKernelImpl:e,dtype:t}){return({inputs:o,backend:n})=>{let{x:s}=o,a=n,i=t||s.dtype;if(a.shouldExecuteOnCPU([s])&&e!=null){let u=a.tensorMap.get(s.dataId),c=e(u.values,i);return a.makeTensorInfo(s.shape,i,c)}let p=new Hr(s.shape,r);return a.runWebGPUProgram(p,[s],i)}}function Ze({opType:r,cpuKernelImpl:e,supportsComplex:t=!1,dtype:o}){return({inputs:n,backend:s})=>{let{a,b:i}=n,p=s;if(t&&a.dtype==="complex64"){let l=p.tensorMap.get(a.dataId),m=p.tensorMap.get(i.dataId),d,f;if(r!==ge.MUL)[d,f]=[[l.complexTensorInfos.real,m.complexTensorInfos.real],[l.complexTensorInfos.imag,m.complexTensorInfos.imag]].map(g=>{let[x,b]=g,C={dataId:x.dataId,dtype:x.dtype,shape:a.shape},w={dataId:b.dataId,dtype:b.dtype,shape:i.shape},k=new Yu(r,a.shape,i.shape);return p.runWebGPUProgram(k,[C,w],dt(x.dtype,b.dtype))});else{let g=new Ul(ge.COMPLEX_MULTIPLY_REAL,a.shape,i.shape),x=new Ul(ge.COMPLEX_MULTIPLY_IMAG,a.shape,i.shape),b=[{dataId:l.complexTensorInfos.real.dataId,dtype:l.complexTensorInfos.real.dtype,shape:a.shape},{dataId:l.complexTensorInfos.imag.dataId,dtype:l.complexTensorInfos.imag.dtype,shape:a.shape},{dataId:m.complexTensorInfos.real.dataId,dtype:m.complexTensorInfos.real.dtype,shape:i.shape},{dataId:m.complexTensorInfos.imag.dataId,dtype:m.complexTensorInfos.imag.dtype,shape:i.shape}];d=p.runWebGPUProgram(g,b,"float32"),f=p.runWebGPUProgram(x,b,"float32")}let h=po({inputs:{real:d,imag:f},backend:p});return p.disposeData(d.dataId),p.disposeData(f.dataId),h}let u=o||dt(a.dtype,i.dtype);if((a.dtype==="string"||i.dtype==="string"||p.shouldExecuteOnCPU([a,i]))&&e!=null){let l=p.tensorMap.get(a.dataId).values,m=p.tensorMap.get(i.dataId).values,d=a.dtype==="string"?S.fromUint8ToStringArray(l):l,f=a.dtype==="string"?S.fromUint8ToStringArray(m):m,[h,g]=e(a.shape,i.shape,d,f,u);return p.makeTensorInfo(g,u,h)}let c=new Yu(r,a.shape,i.shape);return p.runWebGPUProgram(c,[a,i],u)}}var{addImpl:q3,castImpl:j3,ceilImpl:X3,concatImpl:Y3,equalImpl:Q3,expImpl:Z3,expm1Impl:J3,floorImpl:eM,gatherNdImpl:tM,gatherV2Impl:rM,greaterEqualImpl:oM,greaterImpl:nM,lessEqualImpl:sM,lessImpl:aM,logImpl:iM,maxImpl:uM,maximumImpl:pM,minimumImpl:cM,multiplyImpl:lM,negImpl:mM,notEqualImpl:dM,prodImpl:fM,rangeImpl:hM,rsqrtImpl:gM,scatterImpl:xM,simpleAbsImpl:yM,sliceImpl:bM,stridedSliceImpl:CM,stringNGramsImpl:SM,subImpl:wM,tileImpl:IM,topKImpl:vM,transposeImpl:kM,uniqueImpl:iTt}=Zp;var Bre=xe({opType:X.ABS,cpuKernelImpl:yM}),NM={kernelName:ys,backendName:"webgpu",kernelFunc:Bre};var Vre=xe({opType:X.ACOS}),TM={kernelName:aa,backendName:"webgpu",kernelFunc:Vre};var zre=xe({opType:X.ACOSH}),_M={kernelName:ia,backendName:"webgpu",kernelFunc:zre};var Wre=Ze({opType:ge.ADD,cpuKernelImpl:q3,supportsComplex:!0}),$M={kernelName:eo,backendName:"webgpu",kernelFunc:Wre};var Pg=class{constructor(e){this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e[0],this.variableNames=e.map((t,o)=>`T${o}`),this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.shaderKey="addN"}getUserCode(){let e=[];this.variableNames.forEach(n=>{e.push(`let v${n} = get${n}ByOutputCoords(coords);`)});let t=this.variableNames.map(n=>`v${n}`).join(" + ");return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
for (var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if (flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
${e.join(`
`)}
setOutputAtIndex(flatIndex, ${t});
}
}
}
2023-01-06 19:23:06 +01:00
`}};function Ure(r){let{inputs:e,backend:t}=r,o=e;if(o.length===1)return Dt({inputs:{x:o[0]},backend:t});let n=o.map(i=>i.dtype).reduce((i,p)=>dt(i,p)),s=o.map(i=>i.shape),a=new Pg(s);return t.runWebGPUProgram(a,o,n)}var EM={kernelName:Po,backendName:"webgpu",kernelFunc:Ure};var Mg=class{constructor(e,t){this.variableNames=["A"],this.workgroupSize=[16,16,1];let o=new Array(e.length);for(let n=0;n<o.length;n++)o[n]=e[t[n]];this.outputShape=o,this.dispatchLayout={x:[0],y:[1]},this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[1,1,1]),this.shaderKey="transposeShared"}getUserCode(){y.assert(this.workgroupSize[0]===this.workgroupSize[1],()=>`Must be a square tile, current tile shape is ${this.workgroupSize[0]} x ${this.workgroupSize[1]}`);let e=this.workgroupSize[0];return`
2022-11-20 22:20:02 +01:00
var<workgroup> tile : array<array<f32, ${this.workgroupSize[0]+1}>, ${this.workgroupSize[0]}>;
2023-01-06 19:23:06 +01:00
${Q()} {
var x = i32(workgroupId.x) * ${e} + i32(localId.x);
var y = i32(workgroupId.y) * ${e} + i32(localId.y);
2022-11-20 22:20:02 +01:00
let width = uniforms.outShape[0];
let height = uniforms.outShape[1];
if (x < width && y < height) {
tile[localId.y][localId.x] = f32(A[y * width + x]);
}
workgroupBarrier();
2023-01-06 19:23:06 +01:00
x = i32(workgroupId.y) * ${e} + i32(localId.x);
y = i32(workgroupId.x) * ${e} + i32(localId.y);
2022-11-20 22:20:02 +01:00
if (x < height && y < width) {
setOutputAtIndex((y * height + x), tile[localId.x]
[localId.y]);
}
}
2023-01-06 19:23:06 +01:00
`}};var Lg=class{constructor(e,t){this.variableNames=["A"],this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0;let o=new Array(e.length);for(let n=0;n<o.length;n++)o[n]=e[t[n]];this.outputShape=o,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.newDim=t,this.shaderKey=`transpose_${t}`}getUserCode(){let e=Rt(this.outputShape.length),t=Gre(this.newDim);return`
${Q("index")} {
2022-11-20 22:20:02 +01:00
for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if(flatIndex < uniforms.size) {
let resRC = getCoordsFromIndex(flatIndex);
setOutputAtIndex(flatIndex, A[getIndexFromCoords${this.outputShape.length}D(
${e}(${t}), uniforms.aShape)]);
}
}
}
2023-01-06 19:23:06 +01:00
`}};function Gre(r){let e=r.length;if(e>6)throw Error(`Transpose for rank ${e} is not yet supported`);let t=new Array(e);for(let o=0;o<r.length;o++)t[r[o]]=`resRC.${Ao(o)}`;return t.join()}function Nr(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{perm:s}=o,a=t,i=n.shape.length,p=new Array(i);for(let c=0;c<p.length;c++)p[c]=n.shape[s[c]];if(t.shouldExecuteOnCPU([n])){let l=a.tensorMap.get(n.dataId).values,m=kM(l,n.shape,n.dtype,s,p);return t.makeTensorInfo(p,n.dtype,m)}if(n.shape.length===2&&y.arraysEqual(s,[1,0])){let c=new Mg(n.shape,s);return a.runWebGPUProgram(c,[n],n.dtype)}let u=new Lg(n.shape,s);return a.runWebGPUProgram(u,[n],n.dtype)}var AM={kernelName:ro,backendName:"webgpu",kernelFunc:Nr};var Bg=class{constructor(e,t){this.workgroupSize=[64,1,1],this.variableNames=["x"],this.uniforms="reduceSize : i32,",this.size=!0,this.inputShape=[e.batchSize,e.inSize];let[o]=S.computeOutAndReduceShapes(this.inputShape,[1]);this.outputShape=o.length===0?[1]:o,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,[1,1,1]),this.reduceType=t,this.shaderKey=`reduce_${t}`}getUserCode(){let e="",t="0.0",o=this.workgroupSize[0];this.reduceType==="min"||this.reduceType==="max"?(e=`
2022-11-20 22:20:02 +01:00
if (isnan(candidate)) {
bestValue = uniforms.NAN;
} else if (!isnan(bestValue) && candidate ${this.reduceType==="min"?"<":">"} bestValue)
2023-01-06 19:23:06 +01:00
{ bestValue = candidate; }`,t="f32(x[offset])"):this.reduceType==="sum"||this.reduceType==="mean"?e=" bestValue = bestValue + candidate; ":this.reduceType==="prod"?(e=" bestValue = bestValue * candidate; ",t="1.0"):this.reduceType==="all"?(e=" bestValue = f32(bestValue >= 1.0 && candidate >= 1.0); ",t="1.0"):this.reduceType==="any"&&(e=" bestValue = f32(bestValue >= 1.0 || candidate >= 1.0); ",t="0.0");let n=this.reduceType==="mean"?"setOutputAtIndex(outputIndex, bestValue / f32(uniforms.reduceSize));":"setOutputAtIndex(outputIndex, bestValue);";return`
2022-11-20 22:20:02 +01:00
fn DIV_CEIL(a : u32, b : u32) -> u32 {
return ((a - 1u) / b + 1u);
}
${`
2023-01-06 19:23:06 +01:00
var<workgroup> xBestValues : array<f32, ${o}>;
2022-11-20 22:20:02 +01:00
`}
fn getOffset(outputIndex : i32) -> i32 {
let outputCoords = getCoordsFromIndex(outputIndex);
let offset = ${this.outputShape.length===1?"outputCoords":"outputCoords[0]"} * uniforms.reduceSize;
return offset;
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
let outputIndex = index / ${o};
2022-11-20 22:20:02 +01:00
let offset = getOffset(outputIndex);
var bestValue = ${t};
let Length = uniforms.reduceSize;
2023-01-06 19:23:06 +01:00
let WorkPerThread = DIV_CEIL(u32(Length), ${o}u);
2022-11-20 22:20:02 +01:00
for (var k = i32(localId.x); k < Length && outputIndex < uniforms.size;
2023-01-06 19:23:06 +01:00
k = k + ${o}) {
2022-11-20 22:20:02 +01:00
let candidate = f32(x[offset + k]);
${e}
}
xBestValues[localId.x] = bestValue;
workgroupBarrier();
2023-01-06 19:23:06 +01:00
var reduceSize = min(u32(Length), ${o}u);
2022-11-20 22:20:02 +01:00
for (var currentSize = reduceSize / 2u; reduceSize > 1u;
currentSize = reduceSize / 2u) {
let interval = DIV_CEIL(reduceSize, 2u);
if (localId.x < currentSize) {
let candidate = xBestValues[localId.x + interval];
${e}
xBestValues[localId.x] = bestValue;
}
reduceSize = interval;
workgroupBarrier();
}
if (localId.x == 0u && outputIndex < uniforms.size) {
2023-01-06 19:23:06 +01:00
${n}
2022-11-20 22:20:02 +01:00
}
}
2023-01-06 19:23:06 +01:00
`}};function Kr(r,e,t,o,n){let s=r.shape.length,a=[],i=y.parseAxisParam(e,r.shape),p=i,u=S.getAxesPermutation(p,s),c=r;u!=null&&(c=Nr({inputs:{x:r},attrs:{perm:u},backend:n}),p=S.getInnerMostAxes(p.length,s),a.push(c)),S.assertAxesAreInnerMostDims(o,p,s);let[l,m]=S.computeOutAndReduceShapes(c.shape,p),d=l;t&&(d=S.expandShapeToKeepDim(l,i));let f;if((o==="max"||o==="prod")&&n.shouldExecuteOnCPU([c])){let h=n.tensorMap.get(c.dataId).values;switch(o){case"max":let g=uM(h,y.sizeFromShape(m),d,r.dtype);f=n.makeTensorInfo(d,r.dtype,g);break;case"prod":let{outVals:x,outShape:b,outDtype:C}=fM(c.shape,c.dtype,h,p);f=n.makeTensorInfo(b,C,x);break;default:throw new Error(`${o} CPU implementation is not yet supported.`)}}else{let h=y.sizeFromShape(m),x=y.sizeFromShape(c.shape)/h,b={windowSize:h,inSize:h,batchSize:x,outSize:1},C=o==="mean"?"float32":Ta(r.dtype),w=[{type:"int32",data:[h]}],k=new Bg(b,o),_=n.runWebGPUProgram(k,[c],C,w);a.push(_),f=me({inputs:{x:_},attrs:{shape:d},backend:n})}return a.forEach(h=>n.disposeData(h.dataId)),f}function Hre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{keepDims:s,axis:a}=o;return Kr(n,a,s,"all",t)}var RM={kernelName:Mo,backendName:"webgpu",kernelFunc:Hre};function Kre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{keepDims:s,axis:a}=o;return Kr(n,a,s,"any",t)}var DM={kernelName:Lo,backendName:"webgpu",kernelFunc:Kre};var Nc=class{constructor(e,t,o){this.workgroupSize=[64,1,1],this.variableNames=["x"],this.uniforms="infinityValue : f32,",this.size=!0;let n=[t];this.op=o==="min"?"<":">";let[s,a]=S.computeOutAndReduceShapes(e,n);this.outputShape=s.length===0?[1]:s,this.dispatchLayout=ae(this.outputShape),y.sizeFromShape(a)<32||y.sizeFromShape(s)>1e3?(this.type="plain",this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize)):(this.type="shared",this.dispatch=Y(this.dispatchLayout,this.outputShape,[1,1,1])),this.inputShape=e,this.shaderKey=`argMinMax_${this.op}_${this.type}`}getUserCode(){let e=this.workgroupSize[0],t=()=>this.inputShape.length===1?"uniforms.xShape":`uniforms.xShape.${Ao(this.inputShape.length-1)}`,o=()=>{let n="";if(this.outputShape.length===1)this.inputShape.length!==1&&(n+="outputCoords,");else for(let s=0;s<this.outputShape.length;s++)n+=`outputCoords.${Ao(s)},`;return n};return this.type==="shared"?`
2022-11-18 17:13:29 +01:00
fn DIV_CEIL(a : u32, b : u32) -> u32 {
return ((a - 1u) / b + 1u);
}
${`
2023-01-06 19:23:06 +01:00
var<workgroup> xBestIndices : array<i32, ${e}>;
var<workgroup> xBestValues : array<f32, ${e}>;
2022-11-18 17:13:29 +01:00
`}
2023-01-06 19:23:06 +01:00
${Q("index")} {
let outputIndex = index / ${e};
let reduceLength = ${t()};
2022-11-18 17:13:29 +01:00
var bestIndex = i32(localId.x);
var bestValue = uniforms.infinityValue;
let outputCoords = getCoordsFromIndex(outputIndex);
for (var k = i32(localId.x); k < reduceLength && outputIndex < uniforms.size;
2023-01-06 19:23:06 +01:00
k = k + ${e}) {
let candidate = getX(${o()} k);
2022-11-18 17:13:29 +01:00
if (!isnan(candidate) && candidate ${this.op} bestValue) {
bestValue = candidate;
bestIndex = k;
}
}
xBestValues[localId.x] = bestValue;
xBestIndices[localId.x] = bestIndex;
workgroupBarrier();
2023-01-06 19:23:06 +01:00
var reduceSize = min(u32(reduceLength), ${e}u);
2022-11-18 17:13:29 +01:00
for (var currentSize = reduceSize / 2u; reduceSize > 1u;
currentSize = reduceSize / 2u) {
let interval = DIV_CEIL(reduceSize, 2u);
if (localId.x < currentSize) {
let candidate = xBestValues[localId.x + interval];
if (candidate ${this.op} bestValue) {
bestValue = candidate;
xBestValues[localId.x] = bestValue;
xBestIndices[localId.x] = xBestIndices[localId.x + interval];
}
}
reduceSize = interval;
workgroupBarrier();
}
if (localId.x == 0u && outputIndex < uniforms.size) {
setOutputAtIndexI32(outputIndex, xBestIndices[localId.x]);
}
}
`:`
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let outputCoords = getCoordsFromIndex(index);
var bestIndex = 0;
2023-01-06 19:23:06 +01:00
var bestValue = getX(${o()} 0);
let reduceLength = ${t()};
2022-11-18 17:13:29 +01:00
for (var i = 1; i < reduceLength; i++) {
2023-01-06 19:23:06 +01:00
let candidate = getX(${o()} i);
2022-11-18 17:13:29 +01:00
if (candidate ${this.op} bestValue) {
bestValue = candidate;
bestIndex = i;
}
}
setOutputAtIndexI32(index, bestIndex);
}
}
2023-01-06 19:23:06 +01:00
`}};function qre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=S.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=Nr({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=S.getInnerMostAxes(a.length,p.shape.length)),S.assertAxesAreInnerMostDims("argMax",[a[0]],p.shape.length);let c=new Nc(p.shape,a[0],"max"),l=[{type:"float32",data:[Number.NEGATIVE_INFINITY]}],m=t.runWebGPUProgram(c,[p],"int32",l);return u.forEach(d=>t.disposeData(d.dataId)),m}var FM={kernelName:Bo,backendName:"webgpu",kernelFunc:qre};function jre(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s}=o,a=y.parseAxisParam(s,n.shape),i=S.getAxesPermutation(a,n.shape.length),p=n,u=[];i!=null&&(p=Nr({inputs:{x:n},backend:t,attrs:{perm:i}}),u.push(p),a=S.getInnerMostAxes(a.length,p.shape.length)),S.assertAxesAreInnerMostDims("argMin",[a[0]],p.shape.length);let c=new Nc(p.shape,a[0],"min"),l=[{type:"float32",data:[Number.POSITIVE_INFINITY]}],m=t.runWebGPUProgram(c,[p],"int32",l);return u.forEach(d=>t.disposeData(d.dataId)),m}var OM={kernelName:ei,backendName:"webgpu",kernelFunc:jre};var Xre=xe({opType:X.ASIN}),PM={kernelName:ua,backendName:"webgpu",kernelFunc:Xre};var Yre=xe({opType:X.ASINH}),MM={kernelName:pa,backendName:"webgpu",kernelFunc:Yre};var Qre=xe({opType:X.ATAN}),LM={kernelName:ca,backendName:"webgpu",kernelFunc:Qre};var Zre=Ze({opType:ge.ATAN2}),BM={kernelName:ma,backendName:"webgpu",kernelFunc:Zre};var Jre=xe({opType:X.ATANH}),VM={kernelName:la,backendName:"webgpu",kernelFunc:Jre};var Gl=class{constructor(e,t){this.variableNames=["x"],this.uniforms="stride : vec2<i32>, pad : vec2<i32>, dilation : vec2<i32>, convDims : vec2<i32>, filterDims : vec2<i32>,",this.workgroupSize=[128,1,1],this.size=!0,this.outputShape=e.outShape,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey=`pool2D_${t}`,this.poolType=t}getUserCode(){let e="resultValue = max(value, resultValue);";this.poolType==="avg"&&(e="resultValue = resultValue + value; count = count + 1.0;");let t="resultValue";return this.poolType==="avg"&&(t="resultValue / max(count, 1.0)"),`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let xRCCorner = vec2<i32>(coords.yz) * uniforms.stride - uniforms.pad;
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
var resultValue = ${this.poolType==="avg"?"0.0":"-1.0 / pow(10.0, -20.0)"};
var count = 0.0;
for (var wR = 0; wR < uniforms.filterDims.x; wR = wR + uniforms.dilation.x) {
let xR = xRCorner + wR;
if (xR < 0 || xR >= uniforms.convDims.x) {
continue;
}
for (var wC = 0; wC < uniforms.filterDims.y; wC = wC + uniforms.dilation.y) {
let xC = xCCorner + wC;
if (xC < 0 || xC >= uniforms.convDims.y) {
continue;
}
let value = getX(batch, xR, xC, coords[3]);
${e}
}
}
setOutputAtIndex(index, ${t});
}
}
2023-01-06 19:23:06 +01:00
`}};var Vg=class{constructor(e){this.variableNames=["x"],this.uniforms="stride : vec2<i32>,",this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=e.outShape,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="poolWithFilterSizeEqualsOne"}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d = coords[3];
let xRCCorner = coords.yz * uniforms.stride;
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
let value = getX(batch, xRCorner, xCCorner, d);
setOutputAtIndex(index, value);
}
}
2023-01-06 19:23:06 +01:00
`}};function Hl(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reductionIndices:s,keepDims:a}=o;return Kr(n,s,a,"max",t)}var zM={kernelName:yn,backendName:"webgpu",kernelFunc:Hl};function yI(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{keepDims:s,axis:a}=o;return Kr(n,a,s,"mean",t)}var WM={kernelName:Sn,backendName:"webgpu",kernelFunc:yI};function zg(r,e,t,o){if(e.filterWidth===1&&e.filterHeight===1&&y.arraysEqual(e.inShape,e.outShape))return Dt({inputs:{x:r},backend:o});if(e.filterWidth===e.inWidth&&e.filterHeight===e.inHeight&&e.batchSize===1&&e.padInfo.type==="VALID"){let a=r.shape.length,i=me({inputs:{x:r},backend:o,attrs:{shape:[r.shape[a-3]*r.shape[a-2],r.shape[a-1]]}}),p;t==="avg"?p=yI({inputs:{x:i},backend:o,attrs:{axis:0,keepDims:!1}}):(y.assert(t==="max",()=>`Invalid pool type ${t}`),p=Hl({inputs:{x:i},backend:o,attrs:{reductionIndices:0,keepDims:!1}}));let u=me({inputs:{x:p},backend:o,attrs:{shape:e.outShape}});return o.disposeData(i.dataId),o.disposeData(p.dataId),u}let n,s=[{type:"int32",data:[e.strideHeight,e.strideWidth]}];return e.filterHeight===1&&e.filterWidth===1?n=new Vg(e):(t==="avg"?n=new Gl(e,"avg"):(y.assert(t==="max",()=>`Invalid pool type ${t}`),n=new Gl(e,"max")),s.push({type:"int32",data:[e.padInfo.top,e.padInfo.left]},{type:"int32",data:[e.dilationHeight,e.dilationWidth]},{type:"int32",data:[e.inHeight,e.inWidth]},{type:"int32",data:[e.effectiveFilterHeight,e.effectiveFilterWidth]})),o.runWebGPUProgram(n,[r],r.dtype,s)}function eoe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1,c=S.computePool2DInfo(n.shape,s,a,u,i,p);return zg(n,c,"avg",t)}var UM={kernelName:Vo,backendName:"webgpu",kernelFunc:eoe};var Wg=class{constructor(e){this.variableNames=["dy"],this.uniforms=`stride : vec2<i32>, pads : vec2<i32>, dilation : vec2<i32>, filterDims : vec2<i32>,
outHeight : i32, outWidth : i32, avgMultiplier : f32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e.inShape,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="avg_pool2d_backprop"}getUserCode(){return`
${Q("index")} {
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d = coords[3];
let dyRCCorner = vec2<i32>(coords.yz) - uniforms.pads;
let dyRCorner = dyRCCorner.x;
let dyCCorner = dyRCCorner.y;
// Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
for (var wR = 0; wR < uniforms.filterDims[0]; wR = wR + uniforms.dilation[0]) {
let dyR = f32(dyRCorner + wR) / f32(uniforms.stride[0]);
if (dyR < 0.0 || dyR >= f32(uniforms.outHeight) || fract(dyR) > 0.0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims[1]; wC = wC + uniforms.dilation[1]) {
let dyC = f32(dyCCorner + wC) / f32(uniforms.stride[1]);
if (dyC < 0.0 || dyC >= f32(uniforms.outWidth) || fract(dyC) > 0.0) {
continue;
}
let idyC = i32(dyC);
let dyValue = getDy(batch, idyR, idyC, d);
dotProd = dotProd + dyValue * uniforms.avgMultiplier;
}
}
setOutputAtIndex(index, dotProd);
}
}
`}};function toe(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,input:s}=e,a=s;fI([n,s],"avgPoolGrad");let{filterSize:i,strides:p,pad:u}=o,c=S.computePool2DInfo(a.shape,i,p,1,u),l=new Wg(c),m=1/(c.filterHeight*c.filterWidth),d=[{type:"int32",data:[c.strideHeight,c.strideWidth]},{type:"int32",data:[c.effectiveFilterHeight-1-c.padInfo.top,c.effectiveFilterWidth-1-c.padInfo.left]},{type:"int32",data:[c.dilationHeight,c.dilationWidth]},{type:"int32",data:[c.effectiveFilterHeight,c.effectiveFilterWidth]},{type:"int32",data:[c.outHeight]},{type:"int32",data:[c.outWidth]},{type:"float32",data:[m]}];return t.runWebGPUProgram(l,[n],a.dtype,d)}var GM={kernelName:mp,backendName:"webgpu",kernelFunc:toe};function roe(r){let{inputs:e,backend:t,attrs:o}=r,{a:n,b:s}=e,{transposeA:a,transposeB:i}=o;return Xu({a:n,b:s,transposeA:a,transposeB:i,backend:t})}var HM={kernelName:zo,backendName:"webgpu",kernelFunc:roe};var Ug=class{constructor(e,t){this.variableNames=["source"],this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.rank=t.length,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.start=e,this.uniforms=`start : ${Rt(e.length)}, `,this.shaderKey="slice"}getUserCode(){let e=Rt(this.rank),t=ooe(this.rank),o;return this.start.length===1?o=this.outputShape.map((s,a)=>"sourceLoc = uniforms.start + coords;"):o=this.outputShape.map((s,a)=>`sourceLoc.${bI[a]} = uniforms.start.${Ao(a)} + coords.${bI[a]};`),`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
var sourceLoc : ${e};
let coords = getCoordsFromIndex(index);
${o.join(`
`)}
setOutputAtIndex(index, getSource(${t}));
}
}
2023-01-06 19:23:06 +01:00
`}},bI=["x","y","z","w","u","v"];function ooe(r){if(r===1)return"sourceLoc";if(r<=6)return bI.slice(0,r).map(e=>`sourceLoc.${e}`).join(",");throw Error(`Slicing for rank ${r} is not yet supported`)}function fs(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,size:a}=o,[i,p]=ut.parseSliceParams(n,s,a);if(ut.assertParamsValid(n,i,p),t.shouldExecuteOnCPU([n])||n.dtype==="string"){let l=t.tensorMap.get(n.dataId),m=bM(l.values,i,p,n.shape,n.dtype);return t.makeTensorInfo(p,n.dtype,m)}if(y.sizeFromShape(p)===0)return t.makeTensorInfo(p,n.dtype,[]);let u=new Ug(i,p),c=[{type:"int32",data:i}];return t.runWebGPUProgram(u,[n],n.dtype,c)}var KM={kernelName:Es,backendName:"webgpu",kernelFunc:fs};var noe=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,crops:a}=o;y.assert(n.shape.length<=4,()=>"batchToSpaceND for rank > 4 with a WebGPU backend not implemented yet");let i=s.reduce((b,C)=>b*C),p=S.getReshaped(n.shape,s,i),u=S.getPermuted(p.length,s.length),c=S.getReshapedPermuted(n.shape,s,i),l=S.getSliceBeginCoords(a,s.length),m=S.getSliceSize(c,a,s.length),d=[],f=me({inputs:{x:n},backend:t,attrs:{shape:p}}),h=Nr({inputs:{x:f},backend:t,attrs:{perm:u}}),g=me({inputs:{x:h},backend:t,attrs:{shape:c}}),x=fs({inputs:{x:g},backend:t,attrs:{begin:l,size:m}});return d.push(f),d.push(h),d.push(g),d.forEach(b=>t.disposeData(b.dataId)),x},qM={kernelName:bs,backendName:"webgpu",kernelFunc:noe};var soe=`
2022-11-20 22:20:02 +01:00
fn bincount_write(index: i32, value: f32) {
2023-01-06 19:23:06 +01:00
${Ic("&result[index]","value","float32")}
2022-11-20 22:20:02 +01:00
}
2023-01-06 19:23:06 +01:00
`,aoe=`
2022-11-20 22:20:02 +01:00
fn bincount_write(index: i32, value: f32) {
2023-01-06 19:23:06 +01:00
atomicStore(&result[index], bitcast<i32>(value));
2022-11-20 22:20:02 +01:00
}
2023-01-06 19:23:06 +01:00
`,Tc=class{constructor(e,t,o=!1){this.outputShape=[],this.variableNames=["x"],this.uniforms="binCountSize : i32,",this.workgroupSize=[64,1,1],this.atomic=!0,this.hasWeights=!0,this.binaryOutput=!1,this.outputShape=e,this.rank=e.length,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.binaryOutput=o,o&&(this.atomic=!1),this.hasWeights=t,this.hasWeights&&this.variableNames.push("w"),this.shaderKey=`bincount_${this.hasWeights}_${this.binaryOutput}_${this.rank}`}getUserCode(){return`
${this.binaryOutput?aoe:soe}
${Q("index")} {
2022-11-20 22:20:02 +01:00
${this.rank===1?`if (index < uniforms.xShape) {
let indexVal = i32(getX(index));
if (indexVal < uniforms.binCountSize) {
2023-01-06 19:23:06 +01:00
let value = ${this.binaryOutput?1:this.hasWeights?"getW(index)":"1."};
2022-11-20 22:20:02 +01:00
bincount_write(indexVal, value);
}
}`:`let coord = getCoordsFromIndex(index);
if (coordsInBounds2D(coord, uniforms.xShape)) {
let indexVal = i32(getX(coord[0], coord[1]));
if (indexVal < uniforms.binCountSize) {
2023-01-06 19:23:06 +01:00
let value = ${this.binaryOutput?1:this.hasWeights?"getW(coord[0], coord[1])":"1."};
2022-11-20 22:20:02 +01:00
bincount_write(coord.x * uniforms.binCountSize + indexVal, value);
}
}`}
}
2023-01-06 19:23:06 +01:00
`}};function ioe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,weights:s}=e,{size:a}=o,i=y.sizeFromShape(n.shape),u=y.sizeFromShape(s.shape)>0,c=[a],l=s.dtype,m=fr({backend:t,attrs:{shape:c,value:0,dtype:l}}),d=new Tc([i],u),f=[{type:"int32",data:[a]}],h=u?[n,s]:[n];return t.runWebGPUProgram(d,h,l,f,m)}var jM={kernelName:ti,backendName:"webgpu",kernelFunc:ioe};var CI=Ze({opType:ge.NOT_EQUAL,dtype:"bool",cpuKernelImpl:dM}),XM={kernelName:Nn,backendName:"webgpu",kernelFunc:CI};function Xa(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.tensorMap.get(o.dataId);return Dt({inputs:{x:n.complexTensorInfos.real},backend:t})}var YM={kernelName:di,backendName:"webgpu",kernelFunc:Xa};function QM(r,e){let t=new Hr(r.shape,X.TO_INT),o=e.runWebGPUProgram(t,[r],"int32");return{dataId:o.dataId,shape:o.shape,dtype:o.dtype}}function SI(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{dtype:s}=o;if(s==="complex64"){if(n.dtype==="complex64")return Dt({inputs:{x:n},backend:t});let a=Br(n.shape),i=SI({inputs:{x:n},backend:t,attrs:{dtype:"float32"}}),p=po({inputs:{real:i,imag:a},backend:t});return a.dispose(),t.disposeData(i.dataId),p}if(n.dtype==="complex64"){let a=Xa({inputs:{input:n},backend:t}),i=SI({inputs:{x:a},backend:t,attrs:{dtype:s}});return t.disposeData(a.dataId),i}if(!y.hasEncodingLoss(n.dtype,s)){let a=Dt({inputs:{x:n},backend:t});return{dataId:a.dataId,shape:a.shape,dtype:s}}if(t.shouldExecuteOnCPU([n])){let a=t.tensorMap.get(n.dataId).values,[i,p,u]=j3(a,n.shape,n.dtype,s);return t.makeTensorInfo(i,p,u)}if(s==="int32")return QM(n,t);if(s==="bool"){let a=t.makeTensorInfo([],"bool",y.getTypedArrayFromDType("bool",1)),p=CI({inputs:{a:n,b:a},backend:t});return t.disposeData(a.dataId),p}throw new Error(`Error in Cast: failed to cast ${n.dtype} to ${s}`)}var ZM={kernelName:co,backendName:"webgpu",kernelFunc:SI};var uoe=xe({opType:X.CEIL,cpuKernelImpl:X3}),JM={kernelName:Wo,backendName:"webgpu",kernelFunc:uoe};var Gg=class{constructor(e){this.variableNames=["A"],this.uniforms="minVal : f32, maxVal : f32,",this.workPerThread=4,this.workgroupSize=[64,1,1],this.isVec4=!0,this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.shaderKey="clipVec4"}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if(index < uniforms.size) {
let value = getAByOutputIndex(index);
2023-01-06 19:23:06 +01:00
var clampedValue = clamp(
value, vec4<f32>(uniforms.minVal), vec4<f32>(uniforms.maxVal));
clampedValue = select(clampedValue, value, isnanVec4(value));
2022-11-18 17:13:29 +01:00
setOutputAtIndex(index, clampedValue);
}
}
2023-01-06 19:23:06 +01:00
`}};var Hg=class{constructor(e){this.variableNames=["A"],this.uniforms="minVal : f32, maxVal : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="clip"}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if(index < uniforms.size) {
let value = getAByOutputIndex(index);
if (isnan(value)) {
setOutputAtIndex(index, value);
return;
}
setOutputAtIndex(index, clamp(value, uniforms.minVal, uniforms.maxVal));
}
}
2023-01-06 19:23:06 +01:00
`}};function poe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{clipValueMin:s,clipValueMax:a}=o,i,p=[{type:"float32",data:[s]},{type:"float32",data:[a]}];return y.sizeFromShape(n.shape)%4===0?i=new Gg(n.shape):i=new Hg(n.shape),t.runWebGPUProgram(i,[n],n.dtype,p)}var eL={kernelName:lo,backendName:"webgpu",kernelFunc:poe};var Kg=class{constructor(e){this.uniforms="",this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=S.computeOutShape(e,1),this.variableNames=e.map((t,o)=>`T${o}`),this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]),this.offsetLength=e.length-1;for(let t=0;t<this.offsetLength;t++)this.uniforms+=`offset${t} : i32,`;this.shaderKey="concat"}getUserCode(){let e=[];if(this.offsetLength>0){e.push("if (yC < uniforms.offset0){ setOutputAtCoords(coords.x, coords.y, getT0(yR, yC)); }");for(let s=1;s<this.offsetLength;s++)e.push(`else if (yC < uniforms.offset${[s]}){ setOutputAtCoords(coords.x, coords.y, getT${s}(yR, yC - uniforms.offset${s-1})); }`);let o=this.offsetLength,n=this.offsetLength-1;e.push(`else { setOutputAtCoords(coords.x, coords.y, getT${o}(yR, yC - uniforms.offset${n})); }`)}else e.push("setOutputAtCoords(coords.x, coords.y, getT0(yR, yC));");return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
let flatIndex = index * ${this.workPerThread} + i;
if(flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
let yR = coords.x;
let yC = coords.y;
${e.join(`
`)}
}
}
}
2023-01-06 19:23:06 +01:00
`}};function Qu(r){let{inputs:e,backend:t}=r,{input:o}=e,n=t.tensorMap.get(o.dataId);return Dt({inputs:{x:n.complexTensorInfos.imag},backend:t})}var tL={kernelName:ci,backendName:"webgpu",kernelFunc:Qu};function _c(r,e,t){let o=r[0].dtype;if(o==="complex64"){let f=r.map(C=>Xa({inputs:{input:C},backend:t})),h=r.map(C=>Qu({inputs:{input:C},backend:t})),g=_c(f,e,t),x=_c(h,e,t),b=po({inputs:{real:g,imag:x},backend:t});return f.forEach(C=>t.disposeData(C.dataId)),h.forEach(C=>t.disposeData(C.dataId)),t.disposeData(g.dataId),t.disposeData(x.dataId),b}let n=t.shouldExecuteOnCPU(r);if(o==="string"&&(n=!0),n){let f=r.map(k=>{let E=[-1,y.sizeFromShape(k.shape.slice(e))];return me({inputs:{x:k},backend:t,attrs:{shape:E}})}),h=f.map(k=>({vals:t.readSync(k.dataId),shape:k.shape})),g=S.computeOutShape(f.map(k=>k.shape),1),x=f[0].shape[0]===1,b=Y3(h,g,o,x),C=S.computeOutShape(r.map(k=>k.shape),e),w=t.makeTensorInfo(C,o,b);return f.forEach(k=>t.disposeData(k.dataId)),w}let s=t.device.limits.maxStorageBuffersPerShaderStage-1;if(r.length>s){let f=[];for(let g=0;g<r.length;g+=s){let x=r.slice(g,g+s);f.push(_c(x,e,t))}let h=_c(f,e,t);for(let g of f)t.disposeData(g.dataId);return h}let{tensors2D:a,outShape:i}=coe(r,e,t),p=a.map(f=>f.shape),u=new Kg(p),c=[],l=new Array(p.length-1);if(l.length>0){l[0]=p[0][1],c.push({type:"int32",data:[l[0]]});for(let f=1;f<l.length;f++)l[f]=l[f-1]+p[f][1],c.push({type:"int32",data:[l[f]]})}let m=t.runWebGPUProgram(u,a,a[0].dtype,c);a.forEach(f=>t.disposeData(f.dataId));let d=me({inputs:{x:m},backend:t,attrs:{shape:i}});return t.disposeData(m.dataId),d}function coe(r,e,t){let o=S.computeOutShape(r.map(s=>s.shape),e);return{tensors2D:r.map(s=>me({inputs:{x:s},backend:t,attrs:{shape:[y.sizeFromShape(s.shape.slice(0,e)),y.sizeFromShape(s.shape.slice(e))]}})),outShape:o}}function wI(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o,s=y.parseAxisParam(n,e[0].shape)[0],a=e.map(u=>u.shape);S.assertParamsConsistent(a,s);let i=S.computeOutShape(e.map(u=>u.shape),s);if(y.sizeFromShape(i)===0)return t.makeTensorInfo(i,e[0].dtype,[]);let p=e.filter(u=>y.sizeFromShape(u.shape)>0);return p.length===1?Dt({inputs:{x:p[0]},backend:t}):_c(p,s,t)}var rL={kernelName:Cs,backendName:"webgpu",kernelFunc:wI};function loe(r,e,t,o,n=!1,s=null,a=!1,i=4,p=4,u=4){let c=R=>{switch(R){case 1:return"resData = x[xIndex];";case 3:return"resData = vec3<f32>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);";case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${R} is not supported.`)}},l=R=>{switch(R){case 1:return"return W[row * uniforms.wShape[3] + colIn];";case 4:return"return W[row * uniforms.wShape[3] / 4 + colIn];";default:throw new Error(`innerElementSize ${R} is not supported.`)}},m=r?`
2022-11-18 17:13:29 +01:00
let coord = vec4<i32>(batch, xRow, xCol, xCh);
`:`
let coord = vec4<i32>(batch, xCh, xRow, xCol);
2022-11-20 22:20:02 +01:00
`,d=r?`
2022-11-18 17:13:29 +01:00
let coords = vec4<i32>(
batch,
row / outWidth,
row % outWidth,
col);
`:`
let coords = vec4<i32>(
batch,
row,
col / outWidth,
col % outWidth);
2022-11-20 22:20:02 +01:00
`,f=r?"uniforms.xShape[1]":"uniforms.xShape[2]",h=r?"uniforms.xShape[2]":"uniforms.xShape[3]",g=r?"row":"col",x=r?"col":"row",b=`
2022-11-18 17:13:29 +01:00
let inChannels = uniforms.wShape[2];
let outWidth = ${r?"uniforms.outShape[2]":"uniforms.outShape[3]"};
let outRow = ${g} / outWidth;
let outCol = ${g} % outWidth;
2022-11-20 22:20:02 +01:00
let WRow = ${x} / (uniforms.filterDims[1] * inChannels);
let WCol = ${x} / inChannels % uniforms.filterDims[1];
2022-11-18 17:13:29 +01:00
let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * WRow - uniforms.pad[0];
let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * WCol - uniforms.pad[1];
2022-11-20 22:20:02 +01:00
let xCh = ${x} % inChannels;
var resData = ${kt(i)}(0.0);
2022-11-18 17:13:29 +01:00
// The bounds checking is always needed since we use it to pad zero for
// the 'same' padding type.
2022-11-20 22:20:02 +01:00
if (xRow >= 0 && xRow < ${f} && xCol >= 0 && xCol < ${h}) {
2022-11-18 17:13:29 +01:00
${m}
let xIndex = getIndexFromCoords4D(coord, uniforms.xShape);
${c(i)}
}
return resData;`,C=r?e&&o?`
let col = colIn * ${i};
${b}`:`
let col = colIn * ${i};
if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
${b}
}
2022-11-20 22:20:02 +01:00
return ${kt(i)}(0.0);`:o&&t?`
2022-11-18 17:13:29 +01:00
let col = colIn * ${i};
${b}`:`
let col = colIn * ${i};
if (row < uniforms.dimInner && col < uniforms.dimBOuter) {
${b}
}
2023-01-06 19:23:06 +01:00
return ${kt(i)}(0.0);`,w=`${l(p)}`,k=kt(u),_=r?kt(i):kt(p),E=r?kt(p):kt(i);return`
${pr(s,a,u===4,4)}
2022-11-18 17:13:29 +01:00
fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${_} {
${r?C:w}
}
2023-01-06 19:23:06 +01:00
fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${E} {
2022-11-18 17:13:29 +01:00
${r?w:C}
}
fn mm_write(batch: i32, row : i32, colIn : i32, valueIn : ${k}) {
let col = colIn * ${u};
if (row < uniforms.dimAOuter && col < uniforms.dimBOuter)
{
var value = valueIn;
let outWidth = ${r?"uniforms.outShape[2]":"uniforms.outShape[3]"};
2022-11-20 22:20:02 +01:00
${d}
2023-01-06 19:23:06 +01:00
${Gr(n,s)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
2023-01-06 19:23:06 +01:00
}`}var qg=class{constructor(e,t,o,n,s=!1,a=null,i=!1,p=!1){this.variableNames=["x","W"],this.uniforms="filterDims : vec2<i32>, pad : vec2<i32>, stride : vec2<i32>, dilation : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=e.outShape,this.isChannelsLast=e.dataFormat==="channelsLast",this.isVec4=((e.inChannels%4===0||e.inChannels%3===0)&&this.isChannelsLast||e.outWidth%4===0&&!this.isChannelsLast)&&e.outChannels%4===0,this.dispatchLayout=this.isChannelsLast?{x:[3],y:[1,2],z:[0]}:{x:[2,3],y:[1],z:[0]},this.workgroupSize=Bl(this.dispatchLayout,this.outputShape,this.isVec4),this.elementsPerThread=Vl(this.dispatchLayout,this.outputShape,this.isVec4),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,this.elementsPerThread),this.isVec4?(this.isChannelsLast&&e.inChannels%4!==0?(this.innerElementSize=3,this.variableTypes=["f32","vec4<f32>"]):(this.innerElementSize=4,this.variableTypes=["vec4<f32>","vec4<f32>"]),s&&(this.variableNames.push("bias"),this.variableTypes.push("vec4<f32>")),i&&(this.variableNames.push("preluActivationWeights"),this.variableTypes.push("vec4<f32>"))):(this.innerElementSize=this.elementsPerThread[0],s&&this.variableNames.push("bias"),i&&this.variableNames.push("preluActivationWeights")),this.sequentialAccessByThreads=p,this.addBias=s,this.activation=a,this.hasPreluActivationWeights=i,this.tileAOuter=this.workgroupSize[1]*this.elementsPerThread[1],this.tileBOuter=this.workgroupSize[0]*this.elementsPerThread[0],this.tileInner=Math.max(this.workgroupSize[0]*this.innerElementSize,this.workgroupSize[1]),this.fitAOuter=t%this.tileAOuter===0,this.fitBOuter=o%this.tileBOuter===0,this.fitInner=n%this.tileInner===0,this.shaderKey=`conv2DMM_${this.elementsPerThread}_${this.activation}}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.innerElementSize}_${this.isChannelsLast}_${this.sequentialAccessByThreads}`}getUserCode(){let e=this.isVec4?qu(this.elementsPerThread,this.workgroupSize,!this.isChannelsLast,this.tileInner):ju(this.elementsPerThread,this.workgroupSize,!this.isChannelsLast,this.tileInner,!1,null,this.sequentialAccessByThreads),t=this.isVec4?[this.innerElementSize,4,4]:[1,1,1];return`
${loe(this.isChannelsLast,this.fitAOuter,this.fitBOuter,this.fitInner,this.addBias,this.activation,this.hasPreluActivationWeights,t[0],t[1],t[2])}
2022-11-18 17:13:29 +01:00
${e}
2023-01-06 19:23:06 +01:00
`}};var jg=class{constructor(e,t=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms="filterDims: vec2<i32>, pad: vec2<i32>, stride: vec2<i32>, dilation: vec2<i32>,",this.workgroupSize=[4,4,8],this.outputShape=e.outShape,this.isChannelsLast=e.dataFormat==="channelsLast",this.dispatchLayout=this.isChannelsLast?{x:[2],y:[1],z:[0,3]}:{x:[3],y:[2],z:[0,1]},this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.addBias=t,this.activation=o,this.hasPreluActivationWeights=n,t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.shaderKey=`conv2dnaive_${this.activation}_${this.isChannelsLast}`}getUserCode(){return`
${pr(this.activation,this.hasPreluActivationWeights,!1,4)}
2022-11-18 17:13:29 +01:00
fn readInp(batch : i32, row : i32, col : i32, chan : i32) -> f32{
let coords = vec4<i32>(batch, row, col, chan);
if (coordsInBounds4D(coords, uniforms.xShape)) {
return getX(batch, row, col, chan);
} else {
return 0.0;
}
}
fn readFilt(row : i32, col : i32, xChannel : i32, outChannel : i32) -> f32{
let coords = vec4<i32>(row, col, xChannel, outChannel);
if(coordsInBounds4D(coords, uniforms.wShape)) {
return getW(row, col, xChannel, outChannel);
} else {
return 0.0;
}
}
fn writeResult(batch : i32, row : i32, col : i32, chan : i32, valueIn : f32) {
let coords = ${this.isChannelsLast?"vec4<i32>(batch, row, col, chan);":"vec4<i32>(batch, chan, row, col);"}
if (coordsInBounds4D(coords, uniforms.outShape)) {
var value = valueIn;
2023-01-06 19:23:06 +01:00
${Gr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords.x, coords.y, coords.z, coords.w, value);
}
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
let coords = getOutputCoords();
let batch = coords[0];
let outChannel = ${this.isChannelsLast?"coords[3];":"coords[1];"}
let outRow = ${this.isChannelsLast?"coords[1];":"coords[2];"}
let outCol = ${this.isChannelsLast?"coords[2];":"coords[3];"}
var acc : f32 = 0.0;
for (var row = 0; row < uniforms.filterDims[0]; row = row + 1) {
for (var col = 0; col < uniforms.filterDims[1]; col = col + 1) {
let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * row - uniforms.pad[0];
let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * col - uniforms.pad[1];
for (var xChannel = 0; xChannel < ${this.isChannelsLast?"uniforms.xShape[3];":"uniforms.xShape[1];"} xChannel = xChannel + 1) {
${this.isChannelsLast?"let v = readInp(batch, xRow, xCol, xChannel);":"let v = readInp(batch, xChannel, xRow, xCol);"}
let f = readFilt(row, col, xChannel, outChannel);
acc = acc + v * f;
}
}
}
writeResult(batch, outRow, outCol, outChannel, acc);
}
2023-01-06 19:23:06 +01:00
`}};var Xg=class{constructor(e,t){this.variableNames=["x"],this.uniforms=`pad : vec2<i32>, stride : vec2<i32>, dilation : vec2<i32>, outWidth : i32, itemsPerBlockRow : i32,
inChannels : i32,`,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.isChannelsLast=t,this.shaderKey=`im2col_${this.isChannelsLast}`}getUserCode(){let e=this.isChannelsLast?1:2,t=this.isChannelsLast?2:3,o=this.isChannelsLast?"coords[1]":"coords[2]",n=this.isChannelsLast?"coords[2]":"coords[1]",s=this.isChannelsLast?"getX(batch, xRow, xCol, ch)":"getX(batch, ch, xRow, xCol)";return`
${Q("index")} {
2022-11-20 22:20:02 +01:00
let coords = getCoordsFromIndex(index);
if(index < uniforms.size) {
let batch = coords[0];
let row = ${o};
let col = ${n};
let offsetY = (row / uniforms.outWidth) * uniforms.stride[0] - uniforms.pad[0];
let xRow = offsetY + uniforms.dilation[0] * (col / uniforms.itemsPerBlockRow);
var value = 0.0;
if(xRow < uniforms.xShape[${e}] && xRow >= 0) {
let offsetX = (row % uniforms.outWidth) * uniforms.stride[1] -
uniforms.pad[1];
let xCol = offsetX + uniforms.dilation[1] * ((col %
uniforms.itemsPerBlockRow) / uniforms.inChannels);
let ch = col % uniforms.inChannels;
if(xCol < uniforms.xShape[${t}] && xCol >= 0) {
value = ${s};
}
}
setOutputAtIndex(index, value);
}
}
2023-01-06 19:23:06 +01:00
`}};function Yg(r,e){let t=r.length;return t>=3?e?[...r.slice(0,-3),r[t-3]*r[t-2],r[t-1]]:[...r.slice(0,-3),r[t-3],r[t-2]*r[t-1]]:!e&&t===1&&r[0]>1?[r[0],1]:null}function moe({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=t.dataFormat==="channelsLast",u=!p,c=!1,l=p&&t.filterHeight===t.inHeight&&t.filterWidth===t.inWidth&&t.padInfo.type==="VALID",m=[],d,f;if(l){let x=t.inHeight*t.inWidth*t.inChannels;d=me({inputs:{x:r},backend:o,attrs:{shape:[1,t.batchSize,x]}}),f=me({inputs:{x:e},backend:o,attrs:{shape:[1,x,t.outChannels]}})}else d=me({inputs:{x:r},backend:o,attrs:{shape:p?[t.batchSize,t.inHeight*t.inWidth,t.inChannels]:[t.batchSize,t.inChannels,t.inHeight*t.inWidth]}}),f=me({inputs:{x:e},backend:o,attrs:{shape:[1,t.inChannels,t.outChannels]}});if(m.push(d),m.push(f),s!=null){let x=Yg(s.shape,p);x!=null&&(s=me({inputs:{x:s},backend:o,attrs:{shape:x}}),m.push(s))}if(n!=null){let x=Yg(n.shape,p);x!=null&&(n=me({inputs:{x:n},backend:o,attrs:{shape:x}}),m.push(n))}let h=Xu({a:p?d:f,b:p?f:d,transposeA:u,transposeB:c,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),g=me({inputs:{x:h},backend:o,attrs:{shape:t.outShape}});m.push(h);for(let x of m)o.disposeData(x.dataId);return g}function doe({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let{filterWidth:p,filterHeight:u,inChannels:c,strideWidth:l,strideHeight:m,padInfo:d,outWidth:f,outHeight:h,dilationWidth:g,dilationHeight:x,dataFormat:b}=t,C=b==="channelsLast",w=p*u*c,k=h*f,_=C?[t.batchSize,k,w]:[t.batchSize,w,k],E=new Xg(_,C),A=[{type:"int32",data:[d.top,d.left]},{type:"int32",data:[m,l]},{type:"int32",data:[x,g]},{type:"int32",data:[f]},{type:"int32",data:[c*p]},{type:"int32",data:[c]}],R=o.runWebGPUProgram(E,[r],r.dtype,A),D=[];D.push(R);let P=me({inputs:{x:e},backend:o,attrs:{shape:[1,w,-1]}});if(D.push(P),s!=null){let U=Yg(s.shape,C);U!=null&&(s=me({inputs:{x:s},backend:o,attrs:{shape:U}}),D.push(s))}if(n!=null){let U=Yg(n.shape,C);U!=null&&(n=me({inputs:{x:n},backend:o,attrs:{shape:U}}),D.push(n))}let V=Xu({a:C?R:P,b:C?P:R,transposeA:!C,transposeB:!1,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a}),z=me({inputs:{x:V},backend:o,attrs:{shape:t.outShape}});D.push(V);for(let U of D)o.disposeData(U.dataId);return z}function Qg({x:r,filter:e,convInfo:t,backend:o,bias:n=null,preluActivationWeights:s=null,leakyreluAlpha:a=0,activation:i=null}){let p=n!=null,u=s!=null,c=t.dataFormat==="channelsLast",l=c&&t.filterHeight===t.inHeight&&t.filterWidth===t.inWidth&&t.padInfo.type==="VALID",m=O().getBool("WEBGPU_USE_NAIVE_CONV2D_DEBUG");if(!m&&(l||t.filterHeight===1&&t.filterWidth===1&&t.dilationHeight===1&&t.dilationWidth===1&&t.strideHeight===1&&t.strideWidth===1&&(t.padInfo.type==="SAME"||t.padInfo.type==="VALID")))return moe({x:r,filter:e,convInfo:t,backend:o,bias:n,activation:i,preluActivationWeights:s,leakyreluAlpha:a});let d=O().getNumber("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL"),f=d>0?d:o.thresholdToIncreaseWorkgroups,h=t.batchSize*Math.ceil(t.outHeight*t.outWidth/32)*Math.ceil(t.outChannels/32);if(O().getBool("WEBGPU_CONV_SEPARATE_IM2COL_SHADER")||h<=f)return doe({x:r,filter:e,convInfo:t,backend:o,bias:n,preluActivationWeights:s,leakyreluAlpha:a,activation:i});let g,x=[t.padInfo.top,t.padInfo.left],b=[{type:"int32",data:[t.filterHeight,t.filterWidth]},{type:"int32",data:[...x]},{type:"int32",data:[t.strideHeight,t.strideWidth]},{type:"int32",data:[t.dilationHeight,t.dilationWidth]}];if(m)g=new jg(t,p,i,u);else{let _=c?t.outHeight*t.outWidth:t.outChannels,E=c?t.outChannels:t.outHeight*t.outWidth,A=t.filterHeight*t.filterWidth*t.inChannels;b.push({type:"int32",data:[_]},{type:"int32",data:[E]},{type:"int32",data:[A]});let R=o.adapterInfo.isIntel();g=new qg(t,_,E,A,p,i,u,R)}let C=[],w=[r,e];p&&(!c&&n.shape.length===1&&(n=me({inputs:{x:n},backend:o,attrs:{shape:[n.shape[0],1,1]}}),C.push(n)),w.push(n)),u&&(!c&&s.shape.length===1&&(s=me({inputs:{x:s},backend:o,attrs:{shape:[s.
${Q("index")} {
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let batch = coords[0];
let d1 = coords[${o}];
let dyCorner = vec2<i32>(coords[${e}], coords[${t}]) - uniforms.pads;
let dyRCorner = dyCorner.x;
let dyCCorner = dyCorner.y;
// Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
for (var wR = 0; wR < uniforms.filterDims.x; wR = wR + 1) {
let dyR = (f32(dyRCorner) + f32(wR)) / f32(uniforms.stride.x);
let wRPerm = uniforms.filterDims.x - 1 - wR;
if (dyR < 0.0 || dyR >= f32(uniforms.outBackprop[1]) || fract(dyR) > 0.0 ||
wRPerm < 0) {
continue;
}
let idyR = i32(dyR);
for (var wC = 0; wC < uniforms.filterDims.y; wC = wC + 1) {
let dyC = (f32(dyCCorner) + f32(wC)) / f32(uniforms.stride.y);
let wCPerm = uniforms.filterDims.y - 1 - wC;
if (dyC < 0.0 || dyC >= f32(uniforms.outBackprop[2]) ||
fract(dyC) > 0.0 || wCPerm < 0) {
continue;
}
let idyC = i32(dyC);
for (var d2 = 0; d2 < uniforms.outBackprop[3]; d2 = d2 + 1) {
if (${this.isChannelsLast}) {
let xValue = getDy(batch, idyR, idyC, d2);
let wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd = dotProd + xValue * wValue;
} else {
let xValue = getDy(batch, d2, idyR, idyC);
let wValue = getW(wRPerm, wCPerm, d1, d2);
dotProd = dotProd + xValue * wValue;
}
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}},Jg=class{constructor(e){this.variableNames=["x","dy"],this.uniforms="pad : vec2<i32>, stride : vec2<i32>, batchSize : i32, outHeight : i32, outWidth : i32, inHeight : i32, inWidth : i32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e.filterShape,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.isChannelsLast=e.dataFormat==="channelsLast",this.shaderKey=`conv2DDerFilter_${this.isChannelsLast}`}getUserCode(){return`
${Q("index")} {
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let wR = coords[0];
let wC = coords[1];
let d1 = coords[2];
let d2 = coords[3];
// Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
// ? = to be determined. : = across all values in that axis.
var dotProd = 0.0;
for (var b = 0; b < uniforms.batchSize; b = b + 1) {
for (var yR = 0; yR < uniforms.outHeight; yR = yR + 1) {
let xR = wR + yR * uniforms.stride[0] - uniforms.pad[0];
if (xR < 0 || xR >= uniforms.inHeight) {
continue;
}
for (var yC = 0; yC < uniforms.outWidth; yC = yC + 1) {
let xC = wC + yC * uniforms.stride[1] - uniforms.pad[1];
if (xC < 0 || xC >= uniforms.inWidth) {
continue;
}
if (${this.isChannelsLast}) {
let dyValue = getDy(b, yR, yC, d2);
let xValue = getX(b, xR, xC, d1);
dotProd = dotProd + xValue * dyValue;
} else {
let dyValue = getDy(b, d2, yR, yC);
let xValue = getX(b, d1, xR, xC);
dotProd = dotProd + xValue * dyValue;
}
}
}
}
setOutputAtIndex(index, dotProd);
}
}
`}};function hoe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,dy:s}=e,{strides:a,pad:i,dataFormat:p,dimRoundingMode:u,filterShape:c}=o,l=S.convertConv2DDataFormat(p),m=S.computeConv2DInfo(n.shape,c,a,1,i,u,!1,l),d=new Jg(m),f=[{type:"int32",data:[m.padInfo.top,m.padInfo.left]},{type:"int32",data:[m.strideHeight,m.strideWidth]},{type:"int32",data:[m.batchSize]},{type:"int32",data:[m.outHeight]},{type:"int32",data:[m.outWidth]},{type:"int32",data:[m.inHeight]},{type:"int32",data:[m.inWidth]}];return t.runWebGPUProgram(d,[n,s],n.dtype,f)}var nL={kernelName:oi,backendName:"webgpu",kernelFunc:hoe};function goe(r=4){let e=s=>{switch(s){case 1:return"return W[getIndexFromCoords4D(coord, uniforms.wShape)];";case 4:return`
2022-11-18 17:13:29 +01:00
let coord1 = vec4<i32>(coordX, coordY, col + 1, rowInner);
let coord2 = vec4<i32>(coordX, coordY, col + 2, rowInner);
let coord3 = vec4<i32>(coordX, coordY, col + 3, rowInner);
let v0 = W[getIndexFromCoords4D(coord, uniforms.wShape)];
let v1 = W[getIndexFromCoords4D(coord1, uniforms.wShape)];
let v2 = W[getIndexFromCoords4D(coord2, uniforms.wShape)];
let v3 = W[getIndexFromCoords4D(coord3, uniforms.wShape)];
return vec4<f32>(v0, v1, v2, v3);
`;default:throw new Error(`innerElementSize ${s} is not supported.`)}},o=`if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
${`
let outRow = row / uniforms.outShape[2];
let outCol = row % uniforms.outShape[2];
let WRow = col / (uniforms.filterDims[1] * uniforms.outBackprop[3]);
let WCol = col / uniforms.outBackprop[3] % uniforms.filterDims[1];
let xR = f32(outRow - uniforms.pads[0] + WRow) / f32(uniforms.stride[0]);
let xC = f32(outCol - uniforms.pads[1] + WCol) / f32(uniforms.stride[1]);
if (xR < 0.0 || xR >= f32(uniforms.outBackprop[1]) || fract(xR) > 0.0) {
2022-11-20 22:20:02 +01:00
return ${kt(r)}(0.0);
2022-11-18 17:13:29 +01:00
}
if (xC < 0.0 || xC >= f32(uniforms.outBackprop[2]) || fract(xC) > 0.0) {
2022-11-20 22:20:02 +01:00
return ${kt(r)}(0.0);
2022-11-18 17:13:29 +01:00
}
let coord = vec4<i32>(
batch,
i32(xR),
i32(xC),
col % uniforms.outBackprop[3]);
return x[getIndexFromCoords4D(coord, uniforms.xShape)/${r}];`}
}
2022-11-20 22:20:02 +01:00
return ${kt(r)}(0.0);`;return`
fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${kt(r)} {
2022-11-18 17:13:29 +01:00
let col = colIn * ${r};
${o}
}
2022-11-20 22:20:02 +01:00
fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${kt(r)} {
2022-11-18 17:13:29 +01:00
let col = colIn * ${r};
let coordX = uniforms.filterDims.x - 1 -
row / (uniforms.filterDims[1] * uniforms.outBackprop[3]);
let coordY = uniforms.filterDims.y - 1 -
(row / uniforms.outBackprop[3]) % uniforms.filterDims[1];
if (row < uniforms.dimInner && col < uniforms.dimBOuter &&
coordX >= 0 && coordY >= 0) {
let rowInner = row % uniforms.outBackprop[3];
let coord = vec4<i32>(coordX, coordY, col, rowInner);
${e(r)}
}
2022-11-20 22:20:02 +01:00
return ${kt(r)}(0.0);
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
fn mm_write(batch: i32, row : i32, colIn : i32, valueInput : ${kt(r)}) {
2022-11-18 17:13:29 +01:00
let col = colIn * ${r};
if (row < uniforms.dimAOuter && (col + ${r-1}) < uniforms.dimBOuter) {
var value = valueInput;
let outCoord = vec4<i32>(
batch,
row / uniforms.outShape[2],
row % uniforms.outShape[2],
col);
result[getIndexFromCoords4D(outCoord, uniforms.outShape)/${r}] = value;
}
2023-01-06 19:23:06 +01:00
}`}var ex=class{constructor(e){this.variableNames=["x","W"],this.uniforms="filterDims : vec2<i32>, pads : vec2<i32>, stride : vec2<i32>, outBackprop : vec4<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,",this.outputShape=e.inShape,y.assert(e.dataFormat==="channelsLast",()=>"TODO: NCHW is unimplemented"),this.isVec4=e.inChannels%4===0&&e.outChannels%4===0,this.dispatchLayout={x:[3],y:[1,2],z:[0]},this.workgroupSize=Bl(this.dispatchLayout,this.outputShape,this.isVec4),this.elementsPerThread=Vl(this.dispatchLayout,this.outputShape,this.isVec4),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,this.elementsPerThread),this.isVec4&&(this.variableTypes=["vec4<f32>","f32"]),this.shaderKey=`conv2DDerInputMM_${this.isVec4}_${this.elementsPerThread}`}getUserCode(){let e=this.isVec4?qu(this.elementsPerThread,this.workgroupSize):ju(this.elementsPerThread,this.workgroupSize);return`
${goe(this.isVec4?4:1)}
2022-11-18 17:13:29 +01:00
${e}
2023-01-06 19:23:06 +01:00
`}};function xoe(r){let{inputs:e,backend:t,attrs:o}=r,{dy:n,filter:s}=e,{inputShape:a,strides:i,pad:p,dataFormat:u,dimRoundingMode:c}=o,l=S.convertConv2DDataFormat(u),m=S.computeConv2DInfo(a,s.shape,i,1,p,c,!1,l),d=[{type:"int32",data:[m.filterHeight,m.filterWidth]},{type:"int32",data:[m.filterHeight-1-m.padInfo.top,m.filterWidth-1-m.padInfo.left]},{type:"int32",data:[m.strideHeight,m.strideWidth]},{type:"int32",data:[m.batchSize,m.outHeight,m.outWidth,m.outChannels]}],f;if(O().getBool("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE")||m.filterHeight<=2&&m.filterWidth<=2&&m.outChannels<=16&&m.inChannels===1)f=new Zg(m);else{f=new ex(m);let h=m.inHeight*m.inWidth,g=m.inChannels,x=m.filterHeight*m.filterWidth*m.outChannels;d.push({type:"uint32",data:[h]},{type:"uint32",data:[g]},{type:"uint32",data:[x]})}return t.runWebGPUProgram(f,[n,s],"float32",d)}var sL={kernelName:Go,backendName:"webgpu",kernelFunc:xoe};var yoe=xe({opType:X.COS}),aL={kernelName:Ho,backendName:"webgpu",kernelFunc:yoe};var boe=xe({opType:X.COSH}),iL={kernelName:Ko,backendName:"webgpu",kernelFunc:boe};var tx=class{constructor(e,t,o,n){this.variableNames=["Image","Boxes","BoxInd"],this.uniforms="extrapolationValue : f32,",this.workgroupSize=[64,1,1],this.size=!0;let[s]=t;this.outputShape=[s,o[0],o[1],e],this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.methodId=n==="bilinear"?1:0,this.cropHeightBiggerThan1=this.outputShape[1]>1,this.cropWidthBiggerThan1=this.outputShape[2]>1,this.shaderKey=`cropAndResize_${this.methodId}_${this.cropHeightBiggerThan1}_${this.cropWidthBiggerThan1}`}getUserCode(){let[e,t]=["f32(uniforms.imageShape[1] - 1)","f32(uniforms.imageShape[2] - 1)"],[o,n,s]=this.cropHeightBiggerThan1?[`(${e} / f32(uniforms.outShape[1] - 1))`,"(y2-y1) * height_ratio",`y1*${e} + f32(y)*(height_scale)`]:["0.0","0.0",`0.5 * (y1+y2) * ${e}`],[a,i,p]=this.cropWidthBiggerThan1?[`(${t} / f32(uniforms.outShape[2] - 1))`,"(x2-x1) * width_ratio",`x1*${t} + f32(x)*(width_scale)`]:["0.0","0.0",`0.5 * (x1+x2) * ${t}`];return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let height_ratio = f32(${o});
let width_ratio = f32(${a});
let b = coords[0];
let y = coords[1];
let x = coords[2];
let d = coords[3];
// get box vals
let y1 = getBoxes(b, 0);
let x1 = getBoxes(b, 1);
let y2 = getBoxes(b, 2);
let x2 = getBoxes(b, 3);
// get image in batch index
let bInd = i32(round(getBoxInd(b)));
if(bInd < 0 || bInd >= uniforms.outShape[0]) {
return;
}
let height_scale = ${n};
let width_scale = ${i};
let in_y = ${s};
if( in_y < 0.0 || in_y > ${e} ) {
setOutputAtIndex(index, uniforms.extrapolationValue);
return;
}
let in_x = ${p};
if( in_x < 0.0 || in_x > ${t} ) {
setOutputAtIndex(index, uniforms.extrapolationValue);
return;
}
let sourceFracIndexCR = vec2<f32>(in_x,in_y);
if(${this.methodId} == 1) {
// Compute the four integer indices.
let sourceFloorCR = vec2<i32>(sourceFracIndexCR);
let sourceCeilCR = vec2<i32>(ceil(sourceFracIndexCR));
let topLeft = getImage(bInd, sourceFloorCR.y, sourceFloorCR.x, d);
let bottomLeft = getImage(bInd, sourceCeilCR.y, sourceFloorCR.x, d);
let topRight = getImage(bInd, sourceFloorCR.y, sourceCeilCR.x, d);
let bottomRight = getImage(bInd, sourceCeilCR.y, sourceCeilCR.x, d);
let fracCR = sourceFracIndexCR - vec2<f32>(sourceFloorCR);
let top = topLeft + (topRight - topLeft) * fracCR.x;
let bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR.x;
let newValue = top + (bottom - top) * fracCR.y;
setOutputAtIndex(index, newValue);
} else {
// Compute the coordinators of nearest neighbor point.
let sourceNearestCR = vec2<i32>(floor(
sourceFracIndexCR + vec2<f32>(0.5,0.5)));
let newValue = getImage(
bInd, sourceNearestCR.y, sourceNearestCR.x, d);
setOutputAtIndex(index, newValue);
}
}
}
2023-01-06 19:23:06 +01:00
`}};var Coe=r=>{let{inputs:e,backend:t,attrs:o}=r,{image:n,boxes:s,boxInd:a}=e,{cropSize:i,method:p,extrapolationValue:u}=o,c=new tx(n.shape[3],s.shape,i,p),l=[{type:"float32",data:[u]}];return t.runWebGPUProgram(c,[n,s,a],"float32",l)},uL={kernelName:Xo,backendName:"webgpu",kernelFunc:Coe};var Zu;(function(r){r.Prod="*",r.Sum="+"})(Zu||(Zu={}));var Kl=class{constructor(e,t,o,n){this.variableNames=["x"],this.uniforms="index : f32,",this.size=!0,this.workgroupSize=[128,1,1],this.outputShape=t,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.exclusive=o,this.reverse=n,this.op=e,this.shaderKey=`cum_${this.op}_${this.exclusive}_${this.reverse}`}getUserCode(){let e=this.outputShape.length,t=this.op===Zu.Prod?"1.0":"0.0",o=this.exclusive?t:`getX(${pL(e,"coords",this.op)})`,n=this.outputShape[this.outputShape.length-1],s="",a="";return this.exclusive?(s=this.reverse?`end != ${n-1}`:"end != 0",a=this.reverse?"end + 1":"end - 1"):(s=this.reverse?`end + pow2 < ${n}`:"end >= pow2",a=this.reverse?"end + pow2":"end - pow2"),`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
var coords = getCoordsFromIndex(index);
2023-01-06 19:23:06 +01:00
let end = ${cL(e,"coords",this.op)};
2022-11-18 17:13:29 +01:00
var val = ${o};
let pow2 = i32(pow(2.0, uniforms.index));
if (${s}) {
let idx = ${a};
2023-01-06 19:23:06 +01:00
${cL(e,"coords",this.op)} = idx;
val ${this.op}= getX(${pL(e,"coords",this.op)});
2022-11-18 17:13:29 +01:00
}
setOutputAtIndex(index, val);
}
}
2023-01-06 19:23:06 +01:00
`}};function pL(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.x, ${e}.y`;if(r===3)return`${e}.x, ${e}.y, ${e}.z`;if(r===4)return`${e}.x, ${e}.y, ${e}.z, ${e}.w`;throw Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function cL(r,e,t){if(r===1)return`${e}`;if(r===2)return`${e}.y`;if(r===3)return`${e}.z`;if(r===4)return`${e}.w`;throw Error(`Cumulative ${t} for rank ${r} is not yet supported`)}function rx(r,e,t,o,n,s){let a=e.shape.length,i=S.getAxesPermutation([o],a),p=e;i!=null&&(p=Nr({inputs:{x:e},backend:t,attrs:{perm:i}}));let u=S.getInnerMostAxes(1,a)[0];if(u!==a-1)throw new Error(`WebGPU cumprod shader expects an inner-most axis=${e.shape.length-1} but got axis=${o}`);let c=p.shape[u],l=Dt({inputs:{x:p},backend:t});for(let m=0;m<=Math.ceil(Math.log2(c))-1;m++){let d=new Kl(r,p.shape,!1,s),f=l,h=[{type:"float32",data:[m]}];l=t.runWebGPUProgram(d,[l],l.dtype,h),t.disposeData(f.dataId)}if(n){let m=new Kl(r,p.shape,n,s),d=l,f=[{type:"float32",data:[0]}];l=t.runWebGPUProgram(m,[l],l.dtype,f),t.disposeData(d.dataId)}if(i!=null){let m=S.getUndoAxesPermutation(i),d=Nr({inputs:{x:l},backend:t,attrs:{perm:m}});return t.disposeData(l.dataId),t.disposeData(p.dataId),d}return l}function Soe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return rx(Zu.Prod,n,t,s,a,i)}var lL={kernelName:qo,backendName:"webgpu",kernelFunc:Soe};function woe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,exclusive:a,reverse:i}=o;return rx(Zu.Sum,n,t,s,a,i)}var mL={kernelName:jo,backendName:"webgpu",kernelFunc:woe};function Ioe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,weights:s}=e,{size:a,binaryOutput:i}=o,p=n.shape.length===1,c=y.sizeFromShape(s.shape)>0,l=s.dtype,m=p?[n.shape[0]]:[n.shape[0],n.shape[1]],d=p?[a]:[n.shape[0],a],f=fr({backend:t,attrs:{shape:d,value:0,dtype:l}}),h=new Tc(m,c,i),g=[{type:"int32",data:[a]}],x=c?[n,s]:[n];return t.runWebGPUProgram(h,x,l,g,f)}var dL={kernelName:ni,backendName:"webgpu",kernelFunc:Ioe};var ox=class{constructor(e,t){this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.uniforms="blockSize : i32,",this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey=`depthToSpace_${t}`,this.dataFormat=t}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let h = ${this.getHeightCoordString()};
let w = ${this.getWidthCoordString()};
let d = ${this.getDepthCoordString()};
let in_h = h / uniforms.blockSize;
let offset_h = h % uniforms.blockSize;
let in_w = w / uniforms.blockSize;
let offset_w = w % uniforms.blockSize;
let offset_d = (offset_h * uniforms.blockSize + offset_w) *
${this.getOutputDepthSize()};
let in_d = d + offset_d;
let rlt = ${this.getInputSamplingString()};
setOutputAtIndex(index, rlt);
}
2023-01-06 19:23:06 +01:00
}`}getHeightCoordString(){return this.dataFormat==="NHWC"?"coords[1]":"coords[2]"}getWidthCoordString(){return this.dataFormat==="NHWC"?"coords[2]":"coords[3]"}getDepthCoordString(){return this.dataFormat==="NHWC"?"coords[3]":"coords[1]"}getOutputDepthSize(){return this.dataFormat==="NHWC"?"uniforms.outShape[3]":"uniforms.outShape[1]"}getInputSamplingString(){return this.dataFormat==="NHWC"?"getX(b, in_h, in_w, in_d)":"getX(b, in_d, in_h, in_w)"}};function voe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockSize:s,dataFormat:a}=o,i=n.shape[0],p=a==="NHWC"?n.shape[1]:n.shape[2],u=a==="NHWC"?n.shape[2]:n.shape[3],c=a==="NHWC"?n.shape[3]:n.shape[1],l=p*s,m=u*s,d=c/(s*s),f=a==="NHWC"?[i,l,m,d]:[i,d,l,m],h=[{type:"int32",data:[s]}],g=new ox(f,a);return t.runWebGPUProgram(g,[n],n.dtype,h)}var fL={kernelName:Yo,backendName:"webgpu",kernelFunc:voe};var nx=class{constructor(e,t,o,n=!1,s=null,a=!1){this.variableNames=["x","W"],this.uniforms="pad : vec2<i32>, inDims : vec2<i32>,",this.workgroupSize=[16,16,1],this.outputShape=e,this.dispatchLayout={x:[3],y:[2],z:[0,1]},this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),n&&this.variableNames.push("bias"),a&&this.variableNames.push("preluActivationWeights"),this.addBias=n,this.activation=s,this.hasPreluActivation=a,this.filterHeight=t,this.filterWidth=o,this.shaderKey=`depthwiseNCHW_${this.activation}_${this.filterHeight}_${this.filterWidth}`}getUserCode(){let e=this.filterWidth*this.filterHeight,t=this.workgroupSize[0]*this.workgroupSize[1]*this.workgroupSize[2],o=this.workgroupSize[1]+this.filterHeight-1,n=this.workgroupSize[0]+this.filterWidth-1;return`
${pr(this.activation,this.hasPreluActivation,!1,4)}
2022-11-18 17:13:29 +01:00
var<workgroup> mm_Asub : array<array<f32, ${n}>, ${o}>;
var<workgroup> mm_Bsub : array<array<f32, ${this.filterWidth}>, ${this.filterHeight}>;
fn readX(batch : i32, channel : i32, row : i32, col : i32) -> f32 {
var value = 0.0;
if (row >=0 && row < uniforms.inDims[0] && col >=0 && col < uniforms.inDims[1])
{
value = getX(batch, channel, row, col);
}
return value;
}
2023-01-06 19:23:06 +01:00
${Q()} {
2022-11-18 17:13:29 +01:00
let coords = getOutputCoords();
let batch = coords[0];
let xRCCorner = vec2<i32>(coords.zw) - uniforms.pad;
let channelMul = uniforms.wShape[3];
let d1 = coords[1] / channelMul;
let q = coords[1] % channelMul;
let inputRowStart = xRCCorner.x;
let inputColStart = xRCCorner.y;
let localRow = i32(localId.y);
let localCol = i32(localId.x);
// Load one tile of X into local memory.
2022-11-20 22:20:02 +01:00
for (var inputRow = localRow; inputRow < ${o}; inputRow = inputRow + ${this.workgroupSize[1]}) {
for (var inputCol = localCol; inputCol < ${n}; inputCol = inputCol + ${this.workgroupSize[0]}) {
2022-11-18 17:13:29 +01:00
let rowOffset = inputRow - localRow;
let colOffset = inputCol - localCol;
mm_Asub[inputRow][inputCol] = readX(batch, d1, inputRowStart + rowOffset, inputColStart + colOffset);
}
}
// Load one tile of W into local memory.
2022-11-20 22:20:02 +01:00
var wIndex = i32(localIndex);
2022-11-18 17:13:29 +01:00
${e<t?`if (wIndex < ${e})`:`for(; wIndex < ${e}; wIndex = wIndex + ${t})`}
{
let wRow = wIndex / ${this.filterWidth};
let wCol = wIndex % ${this.filterWidth};
mm_Bsub[wRow][wCol] = getW(wRow, wCol, d1, q);
}
workgroupBarrier();
var value = 0.0;
for (var wR = 0; wR < ${this.filterHeight}; wR = wR + 1) {
for (var wC = 0; wC < ${this.filterWidth}; wC = wC + 1) {
let xVal = mm_Asub[localRow + wR][localCol + wC];
let wVal = mm_Bsub[wR][wC];
value = fma(xVal, wVal, value);
}
}
2023-01-06 19:23:06 +01:00
${Gr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
if (coordsInBounds4D(coords, uniforms.outShape)) {
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
2023-01-06 19:23:06 +01:00
`}};var $c=class{constructor(e,t=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms="pad : vec2<i32>, inDims : vec2<i32>,",this.workgroupSize=[4,4,4],this.workPerThread=4,this.isVec4=!0,this.outputShape=e.outShape,this.dispatchLayout={x:[3],y:[2],z:[0,1]},this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[4,this.workPerThread,1]),y.assert(e.dataFormat==="channelsLast",()=>"TODO: NCHW is unimplemented"),t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.convInfo=e,this.addBias=t,this.activation=o,this.hasPreluActivation=n,this.shaderKey=`depthwiseVec4_${o}_${this.convInfo.filterHeight}_${this.convInfo.filterWidth}_${this.convInfo.strideHeight}_${this.convInfo.strideWidth}_${this.workPerThread}`}getUserCode(){let e=(this.workPerThread-1)*this.convInfo.strideWidth+this.convInfo.filterWidth,t=this.convInfo.strideHeight,o=this.convInfo.strideWidth;return`
${pr(this.activation,this.hasPreluActivation,!0,4)}
2022-11-18 17:13:29 +01:00
fn readX(batch : i32, row : i32, col : i32, channel : i32) -> vec4<f32> {
var value = vec4<f32>(0.0);
if (col >=0 && col < uniforms.inDims[1]) {
value = getX(batch, row, col, channel);
}
return value;
}
2023-01-06 19:23:06 +01:00
${Q()} {
2022-11-18 17:13:29 +01:00
let batch = i32(globalId.z) / uniforms.outShape[1];
let r = i32(globalId.z) % uniforms.outShape[1];
let c = i32(globalId.y) * ${this.workPerThread};
let d1 = i32(globalId.x) * 4;
2023-01-06 19:23:06 +01:00
let xRCCorner = vec2<i32>(r, c) * vec2<i32>(${t}, ${o}) - uniforms.pad;
2022-11-18 17:13:29 +01:00
let xRCorner = xRCCorner.x;
let xCCorner = xRCCorner.y;
var xVals : array<vec4<f32>, ${e}>;
var dotProd : array<vec4<f32>, ${this.workPerThread}>;
for (var i = 0; i < ${this.workPerThread}; i++) {
dotProd[i] = vec4<f32>(0.0);
}
// Use constant instead of uniform can give better performance.
for (var wR = 0; wR < ${this.convInfo.filterHeight}; wR = wR + 1) {
let xR = xRCorner + wR;
if (xR >=0 && xR < uniforms.inDims[0]) {
for (var i = 0; i < ${e}; i++) {
xVals[i] = readX(batch, xR, xCCorner + i, d1);
}
for (var wC = 0; wC < ${this.convInfo.filterWidth}; wC = wC + 1) {
let wValue = getW(wR, wC, d1, 0);
for (var i = 0; i < ${this.workPerThread}; i++) {
2023-01-06 19:23:06 +01:00
dotProd[i] = fma(xVals[i * ${o} + wC], wValue, dotProd[i]);
2022-11-18 17:13:29 +01:00
}
}
}
}
for (var i = 0; i < ${this.workPerThread}; i = i + 1) {
let coords = vec4<i32>(batch, r, c + i, d1);
if (coordsInBounds4D(coords, uniforms.outShape)) {
var value = dotProd[i];
2023-01-06 19:23:06 +01:00
${Gr(this.addBias,this.activation)}
2022-11-18 17:13:29 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
}
2023-01-06 19:23:06 +01:00
`}};var Ec=class{constructor(e,t=!1,o=null,n=!1){this.variableNames=["x","W"],this.uniforms=`pad : vec2<i32>, inDims : vec2<i32>, filterHeight : i32,
filterWidth : i32, stride : vec2<i32>, dilation : vec2<i32>,`,this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=e.outShape,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.isChannelsLast=e.dataFormat==="channelsLast",t&&this.variableNames.push("bias"),n&&this.variableNames.push("preluActivationWeights"),this.convInfo=e,this.addBias=t,this.activation=o,this.hasPreluActivation=n,this.shaderKey=`depthwise_${this.activation}_${this.isChannelsLast}`}getUserCode(){let e=this.isChannelsLast?"getX(batch, xR, xC, d1);":"getX(batch, d1, xR, xC);";return`
${pr(this.activation,this.hasPreluActivation,!1,4)}
2022-11-18 17:13:29 +01:00
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getOutputCoords();
let batch = coords[0];
let xRCCorner = vec2<i32>(coords.${this.isChannelsLast?"yz":"zw"}) * uniforms.stride - uniforms.pad;
let d2 = coords[${this.isChannelsLast?3:1}];
let channelMul = uniforms.wShape[3];
let d1 = d2 / channelMul;
let q = d2 % channelMul;
let inputRowStart = xRCCorner.x;
let inputColStart = xRCCorner.y;
let inputRowEnd = inputRowStart + uniforms.filterHeight *
uniforms.dilation[0];
let inputColEnd = inputColStart + uniforms.filterWidth *
uniforms.dilation[1];
// Convolve x(?, ?, d1)|x(d1, ?, ?) with w(:, :, d1, q) to get
// y(yR, yC, d2)|y(d2, yR, yC). ? = to be determined. : = across all
// values in that axis. x(?, ?, d1) and y(yR, yC, d2) is for NHWC.
// x(d1, ?, ?) and y(d2, yR, yC) is for NCHW.
var value = 0.0;
// Extract if checking out of for loop for performance.
if (inputRowStart >= 0 && inputColStart >= 0 &&
inputRowEnd < uniforms.inDims[0] &&
inputColEnd < uniforms.inDims[1]) {
for (var wR = 0; wR < uniforms.filterHeight; wR = wR + 1) {
let xR = inputRowStart + wR * uniforms.dilation[0];
for (var wC = 0; wC < uniforms.filterWidth; wC = wC + 1) {
let xC = inputColStart + wC * uniforms.dilation[1];
let xVal = ${e};
let wVal = getW(wR, wC, d1, q);
value = value + xVal * wVal;
}
}
} else {
for (var wR = 0; wR < uniforms.filterHeight; wR = wR + 1) {
let xR = inputRowStart + wR * uniforms.dilation[0];
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
if (xR < 0 || xR >= uniforms.inDims[0]) {
continue;
}
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
for (var wC = 0; wC < uniforms.filterWidth; wC = wC + 1) {
let xC = inputColStart + wC * uniforms.dilation[1];
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
if (xC < 0 || xC >= uniforms.inDims[1]) {
continue;
}
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
let xVal = ${e};
let wVal = getW(wR, wC, d1, q);
value = value + xVal * wVal;
}
2022-11-18 17:13:29 +01:00
}
}
2023-01-06 19:23:06 +01:00
${Gr(this.addBias,this.activation)}
2022-11-20 22:20:02 +01:00
setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
}
}
2023-01-06 19:23:06 +01:00
`}};function koe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dataFormat:p,dilations:u,dimRoundingMode:c}=o,l=S.convertConv2DDataFormat(p),m=u;m==null&&(m=[1,1]);let d=S.computeConv2DInfo(n.shape,s.shape,a,m,i,c,!0,l),f=[{type:"int32",data:[d.padInfo.top,d.padInfo.left]},{type:"int32",data:[d.inHeight,d.inWidth]}],h=d.dataFormat==="channelsLast",g;return!h&&d.inHeight>16&&d.inWidth>16&&d.strideHeight===1&&d.strideWidth===1&&d.dilationWidth===1&&d.dilationHeight===1&&d.inChannels===d.outChannels?g=new nx(d.outShape,d.filterHeight,d.filterWidth):h&&d.outHeight>4&&d.outWidth>4&&d.strideWidth<=2&&d.inChannels===d.outChannels&&d.dilationHeight===1&&d.dilationWidth===1&&d.inChannels%4===0?g=new $c(d):(g=new Ec(d),f.push({type:"int32",data:[d.filterHeight]},{type:"int32",data:[d.filterWidth]},{type:"int32",data:[d.strideHeight,d.strideWidth]},{type:"int32",data:[d.dilationHeight,d.dilationWidth]})),t.runWebGPUProgram(g,[n,s],n.dtype,f)}var hL={kernelName:Qo,backendName:"webgpu",kernelFunc:koe};var sx=class{constructor(e){this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[e,e],this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="diag"}getUserCode(){return`
${Q("index")} {
if (index < uniforms.size) {
let coords = getOutputCoords();
let value = select(0.0, getX(coords[0]), coords[0] == coords[1]);
setOutputAtIndex(index, value);
}
}
`}};function Noe(r){let{inputs:e,backend:t}=r,{x:o}=e,n=[...o.shape,...o.shape],s=y.sizeFromShape(o.shape),a=me({inputs:{x:o},backend:t,attrs:{shape:[s]}}),i=new sx(s),p=t.runWebGPUProgram(i,[a],a.dtype),u=me({inputs:{x:p},backend:t,attrs:{shape:n}});return t.disposeData(a.dataId),t.disposeData(p.dataId),u}var gL={kernelName:si,backendName:"webgpu",kernelFunc:Noe};var ax=class{constructor(e){this.variableNames=["x","w"],this.uniforms="filterDims: vec2<i32>, pad: vec2<i32>, stride: vec2<i32>, dilation: vec2<i32>",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e.outShape,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="dilation2d"}getUserCode(){return`
${Q("index")} {
if (index < uniforms.size) {
let neg_infinity = -3.4e38;
let coords = getOutputCoords();
let batch = coords.x;
let d1 = coords.w;
let outTopLeftCorner = coords.yz * uniforms.stride - uniforms.pad;
let hBeg = outTopLeftCorner.x;
let wBeg = outTopLeftCorner.y;
var curVal = neg_infinity;
for (var h = 0; h < uniforms.filterDims[0]; h = h + 1) {
let hIn = hBeg + h * uniforms.dilation[0];
if (hIn >= 0 && hIn < uniforms.xShape[1]) {
for (var w = 0; w < uniforms.filterDims[1]; w = w + 1) {
let wIn = wBeg + w * uniforms.dilation[1];
if (wIn >= 0 && wIn < uniforms.xShape[2]) {
let val = getX(batch, hIn, wIn, d1) + getW(h, w, d1);
if (val > curVal) {
curVal = val;
}
}
}
}
}
setOutputAtIndex(index, curVal);
}
}
`}};function Toe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s}=e,{strides:a,pad:i,dilations:p}=o,u=S.computeDilation2DInfo(n.shape,s.shape,a,i,"NHWC",p),c=[u.padInfo.top,u.padInfo.left],l=[{type:"int32",data:[u.filterHeight,u.filterWidth]},{type:"int32",data:[...c]},{type:"int32",data:[u.strideHeight,u.strideWidth]},{type:"int32",data:[u.dilationHeight,u.dilationWidth]}],m=new ax(u);return t.runWebGPUProgram(m,[n,s],n.dtype,l)}var xL={kernelName:ai,backendName:"webgpu",kernelFunc:Toe};var II=Ze({opType:ge.MUL,cpuKernelImpl:lM,supportsComplex:!0}),yL={kernelName:kn,backendName:"webgpu",kernelFunc:II};function ql(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return Kr(n,s,a,"sum",t)}var bL={kernelName:Kn,backendName:"webgpu",kernelFunc:ql};function _oe(r){let{inputs:e,backend:t,attrs:o}=r,{equation:n}=o,s=e,{allDims:a,summedDims:i,idDims:p}=S.decodeEinsumEquation(n,s.length);S.checkEinsumDimSizes(a.length,p,s);let{path:u,steps:c}=S.getEinsumComputePath(i,p),l=c.length,m=null,d=a.length,f=[];for(let h=0;h<l;++h){for(let g of c[h]){let{permutationIndices:x,expandDims:b}=S.getEinsumPermutation(d,p[g]),C;S.isIdentityPermutation(x)?C=s[g]:(C=Nr({inputs:{x:s[g]},backend:t,attrs:{perm:x}}),f.push(C));let w=C.shape.slice();for(let k=0;k<b.length;++k)w.splice(b[k],0,1);y.arraysEqual(C.shape,w)||(C=me({inputs:{x:C},backend:t,attrs:{shape:w}}),f.push(C)),m===null?m=C:(m=II({inputs:{a:C,b:m},backend:t}),f.push(m))}h<l-1&&(u[h]>=0&&(m=ql({inputs:{x:m},backend:t,attrs:{axis:u[h]-(a.length-d),keepDims:!1}}),f.push(m)),d--)}for(let h of f)h!==m&&t.disposeData(h.dataId);return m}var CL={kernelName:ii,backendName:"webgpu",kernelFunc:_oe};var $oe=xe({opType:X.ELU}),SL={kernelName:Jo,backendName:"webgpu",kernelFunc:$oe};var Eoe=Ze({opType:ge.EQUAL,dtype:"bool",cpuKernelImpl:Q3}),wL={kernelName:en,backendName:"webgpu",kernelFunc:Eoe};var Aoe=xe({opType:X.ERF}),IL={kernelName:da,backendName:"webgpu",kernelFunc:Aoe};var vI=xe({opType:X.EXP,cpuKernelImpl:Z3,dtype:"float32"}),vL={kernelName:tn,backendName:"webgpu",kernelFunc:vI};function ix(r){let{inputs:e,attrs:t,backend:o}=r,{dim:n}=t,{input:s}=e,a=s.shape.length,i=s.shape.slice(),p=n;return n<0&&(y.assert(-(a+1)<=n,()=>`Axis must be in the interval [${-(a+1)}, ${a}]`),p=a+n+1),i.splice(p,0,1),me({inputs:{x:s},backend:o,attrs:{shape:i}})}var kL={kernelName:Ss,backendName:"webgpu",kernelFunc:ix};var Roe=xe({opType:X.EXPM1,cpuKernelImpl:J3}),NL={kernelName:fa,backendName:"webgpu",kernelFunc:Roe};var jl=class{constructor(e,t){this.variableNames=["real","imag"],this.outputShape=[],this.uniforms="exponentMultiplier : f32, denominator: f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.component=e,this.shaderKey=`fft_${e}`}getUserCode(){return`
2022-11-20 22:20:02 +01:00
fn unaryOpComplex(real: f32, expR: f32, imag: f32, expI: f32) -> f32 {
${this.component==="real"?"return real * expR - imag * expI;":"return real * expI + imag * expR;"}
}
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
fn mulMatDFT(batch: i32, index: i32) -> f32 {
let indexRatio = f32(index) / f32(uniforms.realShape[1]);
let exponentMultiplierTimesIndexRatio =
uniforms.exponentMultiplier * indexRatio;
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
var result = 0.0;
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
for (var i = 0; i < uniforms.realShape[1]; i = i + 1) {
// x = (-2|2 * PI / N) * index * i;
let x = exponentMultiplierTimesIndexRatio * f32(i);
let expR = cos(x);
let expI = sin(x);
let real = getReal(batch, i);
let imag = getImag(batch, i);
2022-11-18 17:13:29 +01:00
2022-11-20 22:20:02 +01:00
result = result +
unaryOpComplex(real, expR, imag, expI) / uniforms.denominator;
}
return result;
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getOutputCoords();
setOutputAtIndex(index, mulMatDFT(coords[0], coords[1]));
2022-11-18 17:13:29 +01:00
}
2022-11-20 22:20:02 +01:00
}
2023-01-06 19:23:06 +01:00
`}};function ux(r,e,t){let o=t.tensorMap.get(r.dataId),n=y.sizeFromShape(r.shape),s=r.shape[r.shape.length-1],a=n/s,i=[],p=me({inputs:{x:r},backend:t,attrs:{shape:[a,s]}});i.push(p);let u=p.shape,c=new jl("real",u),l=new jl("imag",u),m=[{dataId:o.complexTensorInfos.real.dataId,dtype:o.complexTensorInfos.real.dtype,shape:u},{dataId:o.complexTensorInfos.imag.dataId,dtype:o.complexTensorInfos.imag.dtype,shape:u}],d=e?2*Math.PI:-2*Math.PI,f=e?u[1]:1,h=[{type:"float32",data:[d]},{type:"float32",data:[f]}],g=t.runWebGPUProgram(c,m,"float32",h);i.push(g);let x=t.runWebGPUProgram(l,m,"float32",h);i.push(x);let b=po({inputs:{real:g,imag:x},backend:t});i.push(b);let C=me({inputs:{x:b},backend:t,attrs:{shape:r.shape}});return i.forEach(w=>t.disposeData(w.dataId)),C}function Doe(r){let{inputs:e,backend:t}=r,{input:o}=e;return ux(o,!1,t)}var TL={kernelName:ui,backendName:"webgpu",kernelFunc:Doe};var px=class{constructor(e){this.outputShape=[],this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="flipLeftRight"}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let coordX = uniforms.xShape[2] - coords[2] - 1;
let outputValue = getX(coords[0], coords[1], coordX, coords[3]);
setOutputAtIndex(index, outputValue);
}
}
2023-01-06 19:23:06 +01:00
`}};var _L={kernelName:rn,backendName:"webgpu",kernelFunc:({inputs:r,backend:e})=>{let{image:t}=r,o=e,n=new px(t.shape);return o.runWebGPUProgram(n,[t],t.dtype)}};var Foe=xe({opType:X.FLOOR,cpuKernelImpl:eM}),$L={kernelName:on,backendName:"webgpu",kernelFunc:Foe};var Ooe=Ze({opType:ge.INT_DIV,dtype:"int32"}),EL={kernelName:nn,backendName:"webgpu",kernelFunc:Ooe};var cx=class{constructor(e,t,o=!1){this.isFromPixels=!0,this.outputShape=[0],this.variableNames=[],this.workgroupSize=[256,1,1],this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[t,1,1]),this.importVideo=o,this.shaderKey=`fromPixels_${this.importVideo}`}getUserCode(){let e=this.importVideo?"textureLoad(src, vec2<i32>(coords.yx));":"textureLoad(src, vec2<i32>(coords.yx), 0)";return`
2022-11-18 17:13:29 +01:00
@binding(1) @group(0) var src: ${this.importVideo?"texture_external":"texture_2d<f32>"};
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
let flatIndex = index * uniforms.numChannels;
if (flatIndex < uniforms.size) {
let coords = getCoordsFromIndex(flatIndex);
let values = ${e};
for (var i = 0; i < uniforms.numChannels; i = i + 1) {
result[flatIndex + i] = i32(floor(255.0 * values[i]));
}
}
}
2023-01-06 19:23:06 +01:00
`}};var AL={kernelName:Zi,backendName:"webgpu",kernelFunc:Poe},Ac,kI=O().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU"),lx=new Map;function Poe(r){let{inputs:e,backend:t,attrs:o}=r,{pixels:n}=e,{numChannels:s}=o;if(n==null)throw new Error("pixels passed to tf.browser.fromPixels() can not be null");let a=typeof HTMLVideoElement!="undefined"&&n instanceof HTMLVideoElement,i=typeof HTMLImageElement!="undefined"&&n instanceof HTMLImageElement,p=typeof HTMLCanvasElement!="undefined"&&n instanceof HTMLCanvasElement||typeof OffscreenCanvas!="undefined"&&n instanceof OffscreenCanvas,u=typeof ImageBitmap!="undefined"&&n instanceof ImageBitmap,[c,l]=a?[n.videoWidth,n.videoHeight]:[n.width,n.height],m=[l,c,s],d=!1,f=a||i;if(u||p||f){let b;if(d){let D=n;if(!lx.has(D)||lx.get(D).expired){let P={source:D};lx.set(D,t.device.importExternalTexture(P))}b={width:c,height:l,format:null,usage:null,texture:lx.get(D)}}else{if(f){let L=O().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");(Ac==null||L!==kI)&&(kI=L,Ac=document.createElement("canvas").getContext("2d",{willReadFrequently:kI})),Ac.canvas.width=c,Ac.canvas.height=l,Ac.drawImage(n,0,0,c,l),n=Ac.canvas}let D=GPUTextureUsage.COPY_DST|GPUTextureUsage.RENDER_ATTACHMENT|GPUTextureUsage.TEXTURE_BINDING,P="rgba8unorm",M=t.textureManager.acquireTexture(m[1],m[0],P,D);t.queue.copyExternalImageToTexture({source:n},{texture:M},[m[1],m[0]]),b={width:c,height:l,format:P,usage:D,texture:M}}let C=y.sizeFromShape(m),w=y.computeStrides(m),k=new cx(m,s,d),_=[{type:"uint32",data:[C]},{type:"uint32",data:[s]},{type:"uint32",data:[...w]}],E=t.makeTensorInfo([l,c],"int32"),A=t.tensorMap.get(E.dataId);A.resourceInfo=b;let R=t.runWebGPUProgram(k,[E],"int32",_);return t.disposeData(E.dataId),R}let h=n.data,g=h;if(s!=null&&s!==4){g=new Uint8Array(n.width*n.height*s);let b=h.length,C=0;for(let w=0;w<b;w++)w%4<s&&(g[C++]=h[w])}let x=t.makeTensorInfo(m,"int32",new Int32Array(g));return t.uploadToGPU(x.dataId),x}var mx=class{constructor(e,t,o,n,s){this.uniforms="varianceEpsilon : f32,",this.workgroupSize=[128,1,1],this.size=!0,this.variableNames=["x","mean","variance"],S.assertAndGetBroadcastShape(e,t),S.assertAndGetBroadcastShape(e,o),this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),n!=null&&(S.assertAndGetBroadcastShape(e,n),this.variableNames.push("offset")),s!=null&&(S.assertAndGetBroadcastShape(e,s),this.variableNames.push("scale")),this.offsetShape=n,this.scaleShape=s,this.shaderKey="batchNorm"}getUserCode(){let e="0.0";this.offsetShape!=null&&(e="getOffsetByOutputIndex(index)");let t="1.0";return this.scaleShape!=null&&(t="getScaleByOutputIndex(index)"),`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size)
{
let xValue = getXByOutputIndex(index);
let meanValue = getMeanByOutputIndex(index);
let varianValue = getVarianceByOutputIndex(index);
let offsetValue = ${e};
let scaleValue = ${t};
let inv = scaleValue * inverseSqrt(varianValue + f32(uniforms.varianceEpsilon));
setOutputAtIndex(index,dot(vec3<f32>(xValue, -meanValue, offsetValue), vec3<f32>(inv, inv, 1.0)));
}
}
2023-01-06 19:23:06 +01:00
`}};var RL={kernelName:sn,backendName:"webgpu",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o,scale:n,offset:s,mean:a,variance:i}=r,{varianceEpsilon:p}=e,u=t,c=[o,a,i],l=null;s!=null&&(l=s.shape,c.push(s));let m=null;n!=null&&(m=n.shape,c.push(n));let d=new mx(o.shape,a.shape,i.shape,l,m),f=[{type:"float32",data:[p]}];return u.runWebGPUProgram(d,c,o.dtype,f)}};function Moe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dataFormat:c,dilations:l,dimRoundingMode:m,activation:d,leakyreluAlpha:f}=o,h=S.convertConv2DDataFormat(c),g=S.computeConv2DInfo(n.shape,s.shape,p,l,u,m,!1,h);return Qg({x:n,filter:s,convInfo:g,backend:t,bias:a,preluActivationWeights:i,leakyreluAlpha:f,activation:d})}var DL={kernelName:go,backendName:"webgpu",kernelFunc:Moe};function Loe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,filter:s,bias:a,preluActivationWeights:i}=e,{strides:p,pad:u,dilations:c,dimRoundingMode:l,activation:m,leakyreluAlpha:d}=o,f=c;f==null&&(f=[1,1]),y.assert(S.eitherStridesOrDilationsAreOne(p,f),()=>`Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${p} and dilations '${f}'`);let h=S.computeConv2DInfo(n.shape,s.shape,p,f,u,l,!0),g=[n,s],x=a!=null,b=i!=null;x&&g.push(a),b&&g.push(i);let C=[{type:"int32",data:[h.padInfo.top,h.padInfo.left]},{type:"int32",data:[h.inHeight,h.inWidth]}],w;return h.outHeight>4&&h.outWidth>4&&h.strideWidth<=2&&h.inChannels===h.outChannels&&h.dilationHeight===1&&h.dilationWidth===1&&h.inChannels%4===0?w=new $c(h,x,m,b):(w=new Ec(h,x,m,b),C.push({type:"int32",data:[h.filterHeight]},{type:"int32",data:[h.filterWidth]},{type:"int32",data:[h.strideHeight,h.strideWidth]},{type:"int32",data:[h.dilationHeight,h.dilationWidth]})),m==="leakyrelu"&&(C.push({type:"float32",data:[d]}),w.uniforms+=" alpha : f32,"),t.runWebGPUProgram(w,g,"float32",C)}var FL={kernelName:xo,backendName:"webgpu",kernelFunc:Loe};var dx=class{constructor(e,t){this.variableNames=["A","indices"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey=`gathernd_${e}`,this.sliceDim=e,this.uniforms=`sliceDim : i32, strides : ${Rt(e)},`}getUserCode(){let e;return this.sliceDim>1?e="uniforms.strides[j]":e="uniforms.strides",`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var flattenIndex = 0;
for (var j = 0; j < uniforms.sliceDim; j = j + 1) {
let indexTemp = i32(round(getIndices(coords[0], j)));
let strideNum = ${e};
flattenIndex = flattenIndex + indexTemp * strideNum;
}
setOutputAtIndex(index, getA(flattenIndex, coords[1]));
}
}
2023-01-06 19:23:06 +01:00
`}};function Boe(r){let{inputs:e,backend:t}=r,{params:o,indices:n}=e,s=n.shape,a=s[s.length-1],i=y.sizeFromShape(o.shape),[p,u,c,l]=S.prepareAndValidate(o,n),m=me({inputs:{x:n},backend:t,attrs:{shape:[u,a]}}),d=me({inputs:{x:o},backend:t,attrs:{shape:[y.sizeFromShape(o.shape)/c,c]}});if(t.shouldExecuteOnCPU([o,n])||o.dtype==="string"){let b=t.readSync(n.dataId),C=t.bufferSync(o),w=tM(b,C,o.dtype,u,a,c,l,o.shape,i);return t.makeTensorInfo(p,o.dtype,w.values)}let f=new dx(a,[u,c]),h=[{type:"int32",data:[a]},{type:"int32",data:l}],g=t.runWebGPUProgram(f,[d,m],d.dtype,h),x=me({inputs:{x:g},backend:t,attrs:{shape:p}});return t.disposeData(m.dataId),t.disposeData(d.dataId),t.disposeData(g.dataId),x}var OL={kernelName:an,backendName:"webgpu",kernelFunc:Boe};var fx=class{constructor(e,t){this.variableNames=["A","indices"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e.slice(),this.aShape=e,this.outputShape=t,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="gather"}getUserCode(){let e=Voe(this.aShape);return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
let indexZ = i32(getIndices(resRC.x, resRC.z));
let inBounds = select(0.0, 1.0, indexZ >= 0 && indexZ < uniforms.aShape[2]);
setOutputAtIndex(index, inBounds * getA(${e}));
}
}
2023-01-06 19:23:06 +01:00
`}};function Voe(r){let e=["resRC.x","resRC.y","resRC.z","resRC.w"],t=[];for(let o=0;o<r.length;o++)o===2?t.push("indexZ"):t.push(`${e[o]}`);return t.join()}function NI(r){let{inputs:e,backend:t,attrs:o}=r,{x:n,indices:s}=e,{axis:a,batchDims:i}=o,p=y.parseAxisParam(a,n.shape)[0],u=S.segment_util.collectGatherOpShapeInfo(n,s,p,i),c=y.sizeFromShape(s.shape),l=[],m=me({inputs:{x:n},backend:t,attrs:{shape:[u.batchSize,u.outerSize,u.dimSize,u.sliceSize]}}),d=me({inputs:{x:s},backend:t,attrs:{shape:[u.batchSize,c/u.batchSize]}});l.push(m),l.push(d);let f=[u.batchSize,u.outerSize,c/u.batchSize,u.sliceSize];if(t.shouldExecuteOnCPU([n,s])){let C=t.tensorMap.get(d.dataId).values,w=le(d.shape,d.dtype,C),_=t.tensorMap.get(m.dataId).values,E=le(m.shape,m.dtype,_),A=rM(E,w,f);return l.forEach(R=>t.disposeData(R.dataId)),t.makeTensorInfo(u.outputShape,A.dtype,A.values)}let h=new fx(m.shape,f),g=t.runWebGPUProgram(h,[m,d],m.dtype);l.push(g);let x=me({inputs:{x:g},backend:t,attrs:{shape:u.outputShape}});return l.forEach(b=>t.disposeData(b.dataId)),x}var PL={kernelName:Is,backendName:"webgpu",kernelFunc:NI};var zoe=Ze({opType:ge.GREATER,cpuKernelImpl:nM,dtype:"bool"}),ML={kernelName:un,backendName:"webgpu",kernelFunc:zoe};var Woe=Ze({opType:ge.GREATER_EQUAL,dtype:"bool",cpuKernelImpl:oM}),LL={kernelName:pn,backendName:"webgpu",kernelFunc:Woe};function Uoe(r){let{inputs:e,backend:t}=r,{input:o}=e;return ux(o,!0,t)}var BL={kernelName:pi,backendName:"webgpu",kernelFunc:Uoe};var Goe=xe({opType:X.IS_FINITE,dtype:"bool"}),VL={kernelName:ha,backendName:"webgpu",kernelFunc:Goe};var Hoe=xe({opType:X.IS_INF,dtype:"bool"}),zL={kernelName:ga,backendName:"webgpu",kernelFunc:Hoe};var Koe=xe({opType:X.IS_NAN,dtype:"bool"}),WL={kernelName:cn,backendName:"webgpu",kernelFunc:Koe};function qoe(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{alpha:s}=o,a=[{type:"float32",data:[s]}],i=new Hr(n.shape,X.LEAKYRELU,"alpha : f32,");return t.runWebGPUProgram(i,[n],"float32",a)}var UL={kernelName:ln,backendName:"webgpu",kernelFunc:qoe};var joe=Ze({opType:ge.LESS,dtype:"bool",cpuKernelImpl:aM}),GL={kernelName:mn,backendName:"webgpu",kernelFunc:joe};var Xoe=Ze({opType:ge.LESS_EQUAL,dtype:"bool",cpuKernelImpl:sM}),HL={kernelName:dn,backendName:"webgpu",kernelFunc:Xoe};var hx=class{constructor(e){this.variableNames=[],this.outputShape=[],this.uniforms="start : f32, step : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[e],this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="linSpace"}getUserCode(){return`
${Q("index")} {
if (index < uniforms.size) {
setOutputAtIndex(index, uniforms.start + f32(index) * uniforms.step);
}
}
`}};function Yoe(r){let{backend:e,attrs:t}=r,{start:o,stop:n,num:s}=t,a=(n-o)/(s-1),i=new hx(s),p=[{type:"float32",data:[o]},{type:"float32",data:[a]}];return e.runWebGPUProgram(i,[],"float32",p)}var KL={kernelName:li,backendName:"webgpu",kernelFunc:Yoe};var Qoe=xe({opType:X.LOG,cpuKernelImpl:iM}),qL={kernelName:fn,backendName:"webgpu",kernelFunc:Qoe};var Zoe=xe({opType:X.LOG1P}),jL={kernelName:xa,backendName:"webgpu",kernelFunc:Zoe};var Joe=Ze({opType:ge.LOGICAL_AND,dtype:"bool"}),XL={kernelName:hn,backendName:"webgpu",kernelFunc:Joe};var ene=xe({opType:X.LOGICAL_NOT}),YL={kernelName:gn,backendName:"webgpu",kernelFunc:ene};var tne=Ze({opType:ge.LOGICAL_OR}),QL={kernelName:xn,backendName:"webgpu",kernelFunc:tne};var ZL=`
var powValue = 0.0;
let basis = uniforms.bias + uniforms.alpha * sum;
if (uniforms.beta == 0.5) {
powValue = inverseSqrt(basis);
} else if (uniforms.beta == 1.0) {
powValue = 1.0 / basis;
} else {
powValue = exp(log(basis) * (-uniforms.beta));
}
`,gx=class{constructor(e){this.outputShape=[],this.variableNames=["x"],this.uniforms="radius : i32, bias : f32, alpha : f32, beta : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="lrn"}getUserCode(){return`
${Q("index")} {
if (index < uniforms.size) {
let coords = getOutputCoords();
let b = coords[0];
let r = coords[1];
let c = coords[2];
let d = coords[3];
let x = getX(b, r, c, d);
var sum = 0.0;
for (var i = -uniforms.radius; i <= uniforms.radius; i = i + 1) {
let idx = d + i;
if (idx >= 0 && idx < uniforms.xShape[3]) {
let z = getX(b, r, c, idx);
sum = sum + z * z;
}
}
${ZL}
setOutputAtIndex(index, x * powValue);
}
}
`}},xx=class{constructor(e,t){this.outputShape=[],this.variableNames=["x"],this.uniforms="radius : i32, bias : f32, alpha : f32, beta : f32,",this.workgroupSize=[256,1,1],this.maxAllowRadius=16,y.assert(t<=this.maxAllowRadius,()=>`Radius must be less than or equal to ${this.maxAllowRadius}, current radius is ${t}`),this.outputShape=e,this.elementsPerWorkgroup=this.workgroupSize[0]-2*this.maxAllowRadius,this.dispatchLayout={x:[3],y:[2],z:[0,1]},this.dispatch=Y(this.dispatchLayout,this.outputShape,[this.elementsPerWorkgroup,this.workgroupSize[1],this.workgroupSize[2]]),this.shaderKey="lrn_shared"}getUserCode(){return`
var <workgroup>lrnSub: array<f32, ${this.workgroupSize[0]}>;
const elementsPerWorkgroup = ${this.elementsPerWorkgroup};
const maxAllowRadius = ${this.maxAllowRadius};
${Q()} {
let localDepth = i32(localId.x);
let workgroupDepth = i32(workgroupId.x) * elementsPerWorkgroup;
let xDepth = workgroupDepth + localDepth - maxAllowRadius;
let b = i32(globalId.z) / uniforms.xShape[1];
let r = i32(globalId.z) - b * uniforms.xShape[1];
let c = i32(globalId.y);
let d = workgroupDepth + localDepth;
var x = 0.0;
if (xDepth >= 0 && xDepth < uniforms.xShape[3]) {
x = getX(b, r, c, xDepth);
}
lrnSub[localDepth] = x;
workgroupBarrier();
if (localDepth < elementsPerWorkgroup && d < uniforms.outShape[3]) {
var sum = 0.0;
let index = localDepth + maxAllowRadius;
for (var i = -uniforms.radius; i <= uniforms.radius; i = i + 1) {
let z = lrnSub[index + i];
sum = sum + z * z;
}
${ZL}
setOutputAtCoords(b, r, c, d, lrnSub[index] * powValue);
}
} `}};function rne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{depthRadius:s,bias:a,alpha:i,beta:p}=o,u;s>16?u=new gx(n.shape):u=new xx(n.shape,s);let c=[{type:"int32",data:[s]},{type:"float32",data:[a]},{type:"float32",data:[i]},{type:"float32",data:[p]}];return t.runWebGPUProgram(u,[n],n.dtype,c)}var JL={kernelName:mi,backendName:"webgpu",kernelFunc:rne};var one=Ze({opType:ge.MAX,cpuKernelImpl:pM}),eB={kernelName:bn,backendName:"webgpu",kernelFunc:one};function nne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{filterSize:s,strides:a,pad:i,dimRoundingMode:p}=o,u=1,c=S.computePool2DInfo(n.shape,s,a,u,i,p);return zg(n,c,"max",t)}var tB={kernelName:Cn,backendName:"webgpu",kernelFunc:nne};function sne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return Kr(n,s,a,"min",t)}var rB={kernelName:wn,backendName:"webgpu",kernelFunc:sne};var ane=Ze({opType:ge.MIN,cpuKernelImpl:cM}),oB={kernelName:In,backendName:"webgpu",kernelFunc:ane};var yx=class{constructor(e,t,o){this.uniforms="",this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.map((n,s)=>n[0]+e[s]+n[1]),this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.xShape=e,t.map((n,s)=>{this.uniforms+=` pad${s} : vec2<i32>,`}),this.offset=o==="reflect"?0:1,this.shaderKey=`mirrorPad_${o}`}getUserCode(){let e=this.xShape.length,t=this.xShape.map((u,c)=>`uniforms.pad${c}[0]`).join(","),o=this.xShape.map((u,c)=>`uniforms.pad${c}[0] + uniforms.xShape${e>1?`[${c}]`:""}`).join(","),n=e===1?"start":"start[i]",s=e===1?"end":"end[i]",a=e===1?"outC":"outC[i]",i=Rt(e),p=e>1?["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,e):"coords";return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let start = ${i}(${t});
let end = ${i}(${o});
var outC = getCoordsFromIndex(index);
for (var i = 0; i < ${e}; i = i + 1) {
if (${a} < ${n}) {
${a} = ${n} * 2 - ${a} - ${this.offset};
} else if(${a} >= ${s}) {
${a} = (${s} - 1) * 2 - ${a} + ${this.offset};
}
}
let coords = outC - start;
setOutputAtIndex(index, getX(${p}));
}
}
2023-01-06 19:23:06 +01:00
`}};var nB={kernelName:vn,backendName:"webgpu",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{x:o}=r,{paddings:n,mode:s}=e,a=t,i=n.map(c=>({type:"int32",data:[c[0],c[1]]})),p=new yx(o.shape,n,s);return a.runWebGPUProgram(p,[o],o.dtype,i)}};var ine=Ze({opType:ge.MOD}),sB={kernelName:ya,backendName:"webgpu",kernelFunc:ine};function une(r){let{inputs:e,backend:t}=r,{x:o}=e;if(t.shouldExecuteOnCPU([o])){let s=t.tensorMap.get(o.dataId),[a,i]=mM(s.values,o.shape,o.dtype);return t.makeTensorInfo(i,o.dtype,a)}let n=new Hr(o.shape,X.NEG);return t.runWebGPUProgram(n,[o],o.dtype)}var aB={kernelName:vs,backendName:"webgpu",kernelFunc:une};function pne(r){console.warn("tf.nonMaxSuppression() in webgpu locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p}=o,u=t.readSync(n.dataId),c=t.readSync(s.dataId),{selectedIndices:l}=Vt.nonMaxSuppressionV3Impl(u,c,a,i,p);return t.makeTensorInfo([l.length],"int32",new Int32Array(l))}var iB={kernelName:Tn,backendName:"webgpu",kernelFunc:pne};function cne(r){console.warn("tf.nonMaxSuppression() in webgpu locks the UI thread. Call tf.nonMaxSuppressionAsync() instead");let{inputs:e,backend:t,attrs:o}=r,{boxes:n,scores:s}=e,{maxOutputSize:a,iouThreshold:i,scoreThreshold:p,softNmsSigma:u}=o,c=t.readSync(n.dataId),l=t.readSync(s.dataId),m=a,d=i,f=p,h=u,{selectedIndices:g,selectedScores:x}=Vt.nonMaxSuppressionV5Impl(c,l,m,d,f,h);return[t.makeTensorInfo([g.length],"int32",new Int32Array(g)),t.makeTensorInfo([x.length],"float32",new Float32Array(x))]}var uB={kernelName:_n,backendName:"webgpu",kernelFunc:cne};var bx=class{constructor(e,t){this.variableNames=["x"],this.uniforms="onValue : f32, offValue : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[e,t],this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="onehot"}getUserCode(){return`
${Q("index")} {
2022-11-20 22:20:02 +01:00
if(index < uniforms.size) {
let coords = getCoordsFromIndex(index);
setOutputAtIndex(index, mix(uniforms.offValue, uniforms.onValue,
f32(i32(round(getX(coords.x))) == coords.y)));
}
}
2023-01-06 19:23:06 +01:00
`}};function lne(r){let{inputs:e,backend:t,attrs:o}=r,{indices:n}=e,{dtype:s,depth:a,onValue:i,offValue:p}=o,u=y.sizeFromShape(n.shape),c=new bx(u,a),l=me({inputs:{x:n},backend:t,attrs:{shape:[u]}}),m=[{type:"float32",data:[i]},{type:"float32",data:[p]}],d=t.runWebGPUProgram(c,[l],s,m);t.disposeData(l.dataId);let f=[...n.shape,a],h=me({inputs:{x:d},backend:t,attrs:{shape:f}});return t.disposeData(d.dataId),h}var pB={kernelName:$n,backendName:"webgpu",kernelFunc:lne};function Xl(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="complex64"){let n=Xa({inputs:{input:o},backend:t}),s=Xl({inputs:{x:n},backend:t}),a=Qu({inputs:{input:o},backend:t}),i=Xl({inputs:{x:a},backend:t}),p=po({inputs:{real:s,imag:i},backend:t});return t.disposeData(n.dataId),t.disposeData(s.dataId),t.disposeData(a.dataId),t.disposeData(i.dataId),p}else return fr({attrs:{shape:o.shape,dtype:o.dtype,value:o.dtype==="string"?"":0},backend:t})}var cB={kernelName:Os,backendName:"webgpu",kernelFunc:Xl};function lB(r){let{inputs:e,backend:t}=r,{x:o}=e;if(o.dtype==="string")throw new Error("onesLike is not supported under string dtype");if(o.dtype==="complex64"){let n=Xa({inputs:{input:o},backend:t}),s=lB({inputs:{x:n},backend:t}),a=Qu({inputs:{input:o},backend:t}),i=Xl({inputs:{x:a},backend:t}),p=po({inputs:{real:s,imag:i},backend:t});return t.disposeData(n.dataId),t.disposeData(s.dataId),t.disposeData(a.dataId),t.disposeData(i.dataId),p}else return fr({attrs:{shape:o.shape,dtype:o.dtype,value:1},backend:t})}var mB={kernelName:ks,backendName:"webgpu",kernelFunc:lB};function mne(r){let{inputs:e,backend:t,attrs:o}=r,{axis:n}=o;if(e.length===1)return ix({inputs:{input:e[0]},backend:t,attrs:{dim:n}});let s=e[0].shape,a=e[0].dtype;e.forEach(c=>{y.assertShapesMatch(s,c.shape,"All tensors passed to stack must have matching shapes"),y.assert(a===c.dtype,()=>"All tensors passed to stack must have matching dtypes")});let i=[],p=e.map(c=>{let l=ix({inputs:{input:c},backend:t,attrs:{dim:n}});return i.push(l),l}),u=wI({inputs:p,backend:t,attrs:{axis:n}});return i.forEach(c=>t.disposeData(c.dataId)),u}var dB={kernelName:Ns,backendName:"webgpu",kernelFunc:mne};var Cx=class{constructor(e,t){this.variableNames=["x"],this.uniforms="constantValue : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t.map((o,n)=>o[0]+e[n]+o[1]),this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),t.map((o,n)=>{this.uniforms+=` pad${n} : vec2<i32>,`}),this.xShape=e,this.shaderKey="pad"}getUserCode(){let e=this.xShape.length,t=Rt(e),o=this.xShape.map((l,m)=>`uniforms.pad${m}[0]`).join(","),n=this.xShape.map((l,m)=>`uniforms.pad${m}[0] + uniforms.xShape${e>1?`[${m}]`:""}`).join(","),s=e>1?`${t}(${o})`:`${o}`,a=e>1?`${t}(${n})`:`${n}`,i=e>1?"any(outC < start)":"outC < start",p=e>1?"any(outC >= end)":"outC >= end",u=e>1?["coords[0]","coords[1]","coords[2]","coords[3]"].slice(0,e):"coords";return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let start = ${s};
let end = ${a};
let outC = getCoordsFromIndex(index);
if (${i} || ${p}) {
setOutputAtIndex(index, uniforms.constantValue);
} else {
let coords = outC - start;
setOutputAtIndex(index, getX(${u}));
}
}
}
2023-01-06 19:23:06 +01:00
`}};var TI=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{paddings:s,constantValue:a}=o;if(s.every(u=>y.arraysEqual(u,[0,0])))return Dt({inputs:{x:n},backend:t});if(y.sizeFromShape(n.shape)===0){let u=s.map((c,l)=>c[0]+n.shape[l]+c[1]);return fr({backend:t,attrs:{shape:u,value:a,dtype:n.dtype}})}let i=[{type:"float32",data:[a]}];s.map(u=>i.push({type:"int32",data:[u[0],u[1]]}));let p=new Cx(n.shape,s);return t.runWebGPUProgram(p,[n],n.dtype,i)},fB={kernelName:En,backendName:"webgpu",kernelFunc:TI};var dne=Ze({opType:ge.POW}),hB={kernelName:An,backendName:"webgpu",kernelFunc:dne};function fne(r){let{inputs:e,backend:t}=r,{x:o,alpha:n}=e,s=new Yu(ge.PRELU,o.shape,n.shape);return t.runWebGPUProgram(s,[o,n],"float32")}var gB={kernelName:Rn,backendName:"webgpu",kernelFunc:fne};function hne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{axis:s,keepDims:a}=o;return Kr(n,s,a,"prod",t)}var xB={kernelName:Dn,backendName:"webgpu",kernelFunc:hne};var gne=r=>{let{backend:e,attrs:t}=r,{start:o,stop:n,step:s,dtype:a}=t,i=hM(o,n,s,a);return e.makeTensorInfo([i.length],a,i)},yB={kernelName:Ts,backendName:"webgpu",kernelFunc:gne};var _I=Ze({opType:ge.DIV}),bB={kernelName:Zo,backendName:"webgpu",kernelFunc:_I};var xne=xe({opType:X.RECIPROCAL}),CB={kernelName:Fn,backendName:"webgpu",kernelFunc:xne};var yne=xe({opType:X.RELU}),SB={kernelName:On,backendName:"webgpu",kernelFunc:yne};var bne=xe({opType:X.RELU6}),wB={kernelName:Ln,backendName:"webgpu",kernelFunc:bne};var Sx=class{constructor(e,t,o){this.variableNames=["x"],this.uniforms="adjustHeightWidth : vec2<f32>, halfPixelCenters : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[e[0],t,o,e[3]],this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="resizeBilinear"}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let d = coords[3];
let rc = coords.yz;
let effectiveInSize = vec2<f32>(
f32(uniforms.xShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.xShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveOutSize = vec2<f32>(
f32(uniforms.outShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.outShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveInputOverOutputRatioRC =
effectiveInSize / effectiveOutSize;
// Fractional source index
let sourceFracIndexRC =
(vec2<f32>(rc) + vec2<f32>(uniforms.halfPixelCenters)) *
effectiveInputOverOutputRatioRC - vec2<f32>(uniforms.halfPixelCenters);
// Compute the four integer indices.
let sourceFloorRC = vec2<i32>(sourceFracIndexRC);
let sourceCeilRC = vec2<i32>(
min(vec2<f32>(uniforms.xShape.yz) - vec2<f32>(1.0), ceil(sourceFracIndexRC)));
let topLeft = getX(b, sourceFloorRC.x, sourceFloorRC.y, d);
let bottomLeft = getX(b, sourceCeilRC.x, sourceFloorRC.y, d);
let topRight = getX(b, sourceFloorRC.x, sourceCeilRC.y, d);
let bottomRight = getX(b, sourceCeilRC.x, sourceCeilRC.y, d);
let fracRC = sourceFracIndexRC - vec2<f32>(sourceFloorRC);
let top = topLeft + (topRight - topLeft) * fracRC.y;
let bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y;
let newValue = top + (bottom - top) * fracRC.x;
setOutputAtIndex(index, newValue);
}
}
2023-01-06 19:23:06 +01:00
`}};function Cne(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,size:a,halfPixelCenters:i}=o,[p,u]=a,c=s&&p>1?1:0,l=s&&u>1?1:0,d=[{type:"float32",data:[c,l]},{type:"float32",data:[i?.5:0]}],f=new Sx(n.shape,p,u);return t.runWebGPUProgram(f,[n],"float32",d)}var IB={kernelName:Mn,backendName:"webgpu",kernelFunc:Cne};var wx=class{constructor(e,t,o,n){this.variableNames=["x"],this.uniforms="adjustHeightWidth : vec2<f32>, roundBase : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=[e[0],t,o,e[3]],this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.halfPixelCenters=n,this.shaderKey=`resizeNearest_${n}`}getUserCode(){let e;return this.halfPixelCenters?e="max((vec2<f32>(rc) + vec2<f32>(0.5)) * effectiveInputOverOutputRatioRC, vec2<f32>(0.0))":e="vec2<f32>(rc) * effectiveInputOverOutputRatioRC",`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let b = coords[0];
let d = coords[3];
let rc = coords.yz;
let effectiveInSize = vec2<f32>(
f32(uniforms.xShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.xShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveOutSize = vec2<f32>(
f32(uniforms.outShape.y) - uniforms.adjustHeightWidth[0],
f32(uniforms.outShape.z) - uniforms.adjustHeightWidth[1]);
let effectiveInputOverOutputRatioRC =
effectiveInSize / effectiveOutSize;
// Fractional source index
let sourceFracIndexRC = ${e};
// Compute the coordinators of nearest neighbor point.
let inputShapeRC = vec2<f32>(f32(uniforms.xShape.y), f32(uniforms.xShape.z));
let sourceNearestRC = vec2<i32>(
min(inputShapeRC - 1.0, floor(sourceFracIndexRC + uniforms.roundBase)));
let newValue = getX(b, sourceNearestRC.x, sourceNearestRC.y, d);
setOutputAtIndex(index, newValue);
}
}
2023-01-06 19:23:06 +01:00
`}};function Sne(r){let{inputs:e,backend:t,attrs:o}=r,{images:n}=e,{alignCorners:s,halfPixelCenters:a,size:i}=o,[p,u]=i,c=s&&p>1?1:0,l=s&&u>1?1:0,d=[{type:"float32",data:[c,l]},{type:"float32",data:[s?.5:0]}],f=new wx(n.shape,p,u,a);return t.runWebGPUProgram(f,[n],n.dtype,d)}var vB={kernelName:Pn,backendName:"webgpu",kernelFunc:Sne};var Ix=class{constructor(e){this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms=" axis : vec4<i32>,",this.shaderKey="reverse"}getUserCode(){return`
2022-11-20 22:20:02 +01:00
// Using uniform variables as judging conditions, so the function has
// coherent execution within all threads.
fn getReverseCoords(coords : vec4<i32>) -> vec4<i32> {
var reverseCoords = coords;
if (uniforms.axis[0] == 1) {
reverseCoords[0] = uniforms.xShape[0] - coords[0] - 1;
}
if (uniforms.axis[1] == 1) {
reverseCoords[1] = uniforms.xShape[1] - coords[1] - 1;
}
if (uniforms.axis[2] == 1) {
reverseCoords[2] = uniforms.xShape[2] - coords[2] - 1;
}
if (uniforms.axis[3] == 1) {
reverseCoords[3] = uniforms.xShape[3] - coords[3] - 1;
}
return reverseCoords;
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let reverseCoords = getReverseCoords(coords);
setOutputAtIndex(index, getX(reverseCoords[0],
reverseCoords[1], reverseCoords[2], reverseCoords[3]));
}
}
2023-01-06 19:23:06 +01:00
`}};function wne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{dims:s}=o,a=n.shape.length;if(a===0)return Dt({inputs:{x:n},backend:t});let i=n.shape,p=[1,1,1,1];i.forEach((g,x)=>{let b=x+4-a;p[b]=g});let u=y.parseAxisParam(s,n.shape),c=[0,0,0,0];u.forEach(g=>{let x=g+4-a;c[x]=1});let l=[{type:"int32",data:c}],m=me({inputs:{x:n},backend:t,attrs:{shape:p}}),d=new Ix(p),f=t.runWebGPUProgram(d,[m],m.dtype,l);t.disposeData(m.dataId);let h=me({inputs:{x:f},backend:t,attrs:{shape:i}});return t.disposeData(f.dataId),h}var kB={kernelName:Bn,backendName:"webgpu",kernelFunc:wne};var vx=class{constructor(e,t){this.outputShape=[],this.variableNames=["x"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms=`centerX : f32, centerY : f32, sinRadians : f32,
2022-11-18 17:13:29 +01:00
cosRadians : f32,`,this.shaderKey="rotate",this.outputShape=e,typeof t=="number"?(this.uniforms+=" fillValue : f32,",this.fillSnippet="var outputValue = uniforms.fillValue;",this.shaderKey+="_float"):(this.uniforms+=" fillValue : vec3<f32>,",this.fillSnippet="var outputValue = uniforms.fillValue[coords[3]];",this.shaderKey+="_vec3")}getUserCode(){return`
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let coordXFloat = (f32(coords[2]) - uniforms.centerX) *
uniforms.cosRadians - (f32(coords[1]) - uniforms.centerY) *
uniforms.sinRadians;
let coordYFloat = (f32(coords[2]) - uniforms.centerX) *
uniforms.sinRadians + (f32(coords[1]) - uniforms.centerY) *
uniforms.cosRadians;
let coordX = i32(round(coordXFloat + uniforms.centerX));
let coordY = i32(round(coordYFloat + uniforms.centerY));
${this.fillSnippet}
if(coordX >= 0 && coordX < uniforms.xShape[2] && coordY >= 0 &&
coordY < uniforms.xShape[1]) {
outputValue = getX(coords[0], coordY, coordX, coords[3]);
}
setOutputAtIndex(index, outputValue);
}
}
2023-01-06 19:23:06 +01:00
`}};var NB={kernelName:ts,backendName:"webgpu",kernelFunc:({inputs:r,attrs:e,backend:t})=>{let{image:o}=r,{radians:n,fillValue:s,center:a}=e,i=t,p=new vx(o.shape,s),[u,c]=S.getImageCenter(a,o.shape[1],o.shape[2]),l=[{type:"float32",data:[u]},{type:"float32",data:[c]},{type:"float32",data:[Math.sin(n)]},{type:"float32",data:[Math.cos(n)]}];return typeof s=="number"?l.push({type:"float32",data:[Number.parseFloat(s.toFixed(2))]}):l.push({type:"float32",data:s}),i.runWebGPUProgram(p,[o],o.dtype,l)}};var Ine=xe({opType:X.ROUND}),TB={kernelName:Vn,backendName:"webgpu",kernelFunc:Ine};var vne=xe({opType:X.RSQRT,cpuKernelImpl:gM}),_B={kernelName:zn,backendName:"webgpu",kernelFunc:vne};var Hi=class{constructor(e,t,o,n,s,a,i,p=!0){this.variableNames=["updates","indices"],this.workgroupSize=[64,1,1],this.atomic=!0,this.outputShape=a,this.type=i,this.sumDupeIndices=p,this.dispatchLayout=ae(e),this.dispatch=Y(this.dispatchLayout,e,this.workgroupSize),this.sliceDimGreaterThanOne=t>1,this.shaderKey=`scatter_${o}_${n}_${this.sliceDimGreaterThanOne}_${i}_${p}`;let u=Rt(s.length);this.uniforms=`sliceDim : i32, strides: ${u}, updatesSize: i32,`,this.updatesRank=n,this.indicesRank=o}getUserCode(){let e="";this.indicesRank===1?e="coords[0]":this.indicesRank===2&&(e="coords[0], j");let t=`getIndices(${e})`,o=this.sliceDimGreaterThanOne?"uniforms.strides[j]":"uniforms.strides",n="",s="";this.dispatchLayout.x.length===1?(n="flattenedIndex",s=`
2022-11-18 17:13:29 +01:00
fn getUpdatesCoordsFromFlatIndex(index : i32) -> i32 {
return index;
}
`):this.dispatchLayout.x.length===2&&(n="vec2<i32>(flattenedIndex, coords[1])",s=`
fn getUpdatesCoordsFromFlatIndex(index : i32) -> vec2<i32> {
// N.B. |updates| could be a scalar tensor, conceptually representing a
// 2D tensor with all values equal to that. By design, its size must be
// the same as |outShape[1]| in one dimension, and |indicesShape[0]|
// gives the other.
let sliceSize = uniforms.outShape[1];
let d0 = index / sliceSize;
let d1 = index - d0 * sliceSize;
return vec2<i32>(d0, d1);
}
2023-01-06 19:23:06 +01:00
`);let i=`getUpdates(${Array.from({length:this.updatesRank},(u,c)=>`coords[${c}]`).join(", ")})`;return`
2022-11-18 17:13:29 +01:00
${s}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.updatesSize) {
2022-11-18 17:13:29 +01:00
let coords = getUpdatesCoordsFromFlatIndex(index);
var flattenedIndex = 0;
for (var j = 0; j < uniforms.sliceDim; j = j + 1) {
let indexInside = i32(round(${t}));
flattenedIndex = flattenedIndex + indexInside * ${o};
}
let updateValue =
2023-01-06 19:23:06 +01:00
${vc(this.type,!1)}(${i});
2022-11-18 17:13:29 +01:00
let flatIndex = getOutputIndexFromCoords(${n});
2023-01-06 19:23:06 +01:00
${this.sumDupeIndices?Ic("&result[flatIndex]","updateValue",this.type):"atomicStore(&result[flatIndex], bitcast<i32>(updateValue));"}
2022-11-18 17:13:29 +01:00
}
2023-01-06 19:23:06 +01:00
}`}};function kne(r){let{inputs:e,backend:t,attrs:o}=r,{indices:n,updates:s}=e,{shape:a}=o,{sliceRank:i,numUpdates:p,sliceSize:u,strides:c,outputSize:l}=S.calculateShapes(s,n,a),m=[l/u,u];if(l===0)return t.makeTensorInfo(a,n.dtype);let d=me({inputs:{x:n},backend:t,attrs:{shape:[p,i]}}),f=me({inputs:{x:s},backend:t,attrs:{shape:[p,u]}}),h=f.dtype,g=fr({backend:t,attrs:{shape:m,value:0,dtype:h}}),x=y.sizeFromShape(f.shape),b=[{type:"int32",data:[i]},{type:"int32",data:c},{type:"int32",data:[x]}],C=new Hi(f.shape,i,d.shape.length,f.shape.length,c,m,h),w=t.runWebGPUProgram(C,[f,d],h,b,g),k=me({inputs:{x:w},backend:t,attrs:{shape:a}});return t.disposeData(d.dataId),t.disposeData(f.dataId),t.disposeData(w.dataId),k}var $B={kernelName:Wn,backendName:"webgpu",kernelFunc:kne};var kx=class{constructor(e,t){this.outputShape=[],this.variableNames=["sortedSequence","values"],this.uniforms="numInputs : i32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.side=t,this.shaderKey=`search_sorted_${t}`}getUserCode(){return`
2022-11-20 22:20:02 +01:00
fn findBound(batch: i32, value: f32) -> i32 {
var left = i32(0);
var right = uniforms.numInputs;
while (left < right) {
var mid = (left + right) / 2;
if (getSortedSequence(batch, mid) ${this.side==="left"?"<":"<="} value) {
left = mid + 1;
} else {
right = mid;
}
}
return right;
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-20 22:20:02 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
let value = getValuesByOutputIndex(index);
setOutputAtIndexI32(index, findBound(coords[0], value));
}
}
2023-01-06 19:23:06 +01:00
`}};function Nne(r){let{inputs:e,backend:t,attrs:o}=r,{sortedSequence:n,values:s}=e,{side:a}=o,i=new kx([s.shape[0],s.shape[1]],a),p=[{type:"int32",data:[n.shape[1]]}];return t.runWebGPUProgram(i,[n,s],"int32",p)}var EB={kernelName:fi,backendName:"webgpu",kernelFunc:Nne};var Nx=class{constructor(e,t,o){this.variableNames=["c","a","b"],this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=t,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.cRank=e,this.rank=o,this.shaderKey="select"}getUserCode(){let e,t;if(this.rank>4)throw Error(`Where for rank ${this.rank} is not yet supported`);if(this.rank===1)t="resRC",e="resRC";else{let n=["resRC.x","resRC.y","resRC.z","resRC.w"],s=[],a=[];for(let i=0;i<this.outputShape.length;i++)a.push(`${n[i]}`),i<this.cRank&&s.push(`${n[i]}`);e=s.join(),t=a.join()}return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
let cVal = getC(${e});
if (cVal >= 1.0) {
setOutputAtIndex(index, getA(${t}));
} else {
setOutputAtIndex(index, getB(${t}));
}
}
}
2023-01-06 19:23:06 +01:00
`}};function Tne(r){let{inputs:e,backend:t}=r,{condition:o,t:n,e:s}=e,a=new Nx(o.shape.length,n.shape,n.shape.length);return t.runWebGPUProgram(a,[o,n,s],dt(n.dtype,s.dtype))}var AB={kernelName:$s,backendName:"webgpu",kernelFunc:Tne};var _ne=xe({opType:X.SELU}),RB={kernelName:Ca,backendName:"webgpu",kernelFunc:_ne};var $ne=xe({opType:X.SIGMOID}),DB={kernelName:Gn,backendName:"webgpu",kernelFunc:$ne};var Ene=xe({opType:X.SIGN}),FB={kernelName:wa,backendName:"webgpu",kernelFunc:Ene};var Ane=xe({opType:X.SIN}),OB={kernelName:Un,backendName:"webgpu",kernelFunc:Ane};var Rne=xe({opType:X.SINH}),PB={kernelName:Sa,backendName:"webgpu",kernelFunc:Rne};var $I=Ze({opType:ge.SUB,cpuKernelImpl:wM,supportsComplex:!0}),MB={kernelName:Yn,backendName:"webgpu",kernelFunc:$I};function Dne(r){let{inputs:e,backend:t,attrs:o}=r,{logits:n}=e,{dim:s}=o,a=y.parseAxisParam([s],n.shape),i=Hl({inputs:{x:n},backend:t,attrs:{reductionIndices:a,keepDims:!1}}),p=S.expandShapeToKeepDim(i.shape,a),u=me({inputs:{x:i},backend:t,attrs:{shape:p}}),c=$I({inputs:{a:n,b:u},backend:t}),l=vI({inputs:{x:c},backend:t}),m=ql({inputs:{x:l},backend:t,attrs:{axis:a,keepDims:!1}}),d=me({inputs:{x:m},backend:t,attrs:{shape:p}}),f=_I({inputs:{a:l,b:d},backend:t});return t.disposeData(i.dataId),t.disposeData(u.dataId),t.disposeData(c.dataId),t.disposeData(l.dataId),t.disposeData(m.dataId),t.disposeData(d.dataId),f}var LB={kernelName:qn,backendName:"webgpu",kernelFunc:Dne};var Fne=xe({opType:X.SOFTPLUS}),BB={kernelName:Ia,backendName:"webgpu",kernelFunc:Fne};var One=r=>{let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{blockShape:s,paddings:a}=o;y.assert(n.shape.length<=4,()=>"spaceToBatchND for rank > 4 with a WebGPU backend not implemented yet");let i=s.reduce((x,b)=>x*b),p=[[0,0]];p.push(...a);for(let x=1+s.length;x<n.shape.length;++x)p.push([0,0]);let u=[],c=TI({inputs:{x:n},backend:t,attrs:{paddings:p,constantValue:0}}),l=S.getReshaped(c.shape,s,i,!1),m=S.getPermuted(l.length,s.length,!1),d=S.getReshapedPermuted(c.shape,s,i,!1),f=me({inputs:{x:c},backend:t,attrs:{shape:l}}),h=Nr({inputs:{x:f},backend:t,attrs:{perm:m}}),g=me({inputs:{x:h},backend:t,attrs:{shape:d}});return u.push(c),u.push(f),u.push(h),u.forEach(x=>t.disposeData(x.dataId)),g},VB={kernelName:As,backendName:"webgpu",kernelFunc:One};var Tx=class{constructor(e,t){this.variableNames=["A"],this.workgroupSize=[64,1,1],this.size=!0;let o=new Array(e.length);for(let n=0;n<o.length;n++)o[n]=e[n]*t[n];this.outputShape=o,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.rank=this.outputShape.length,this.shaderKey="tile"}getUserCode(){let e=Pne(this.rank,"uniforms.");return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let resRC = getCoordsFromIndex(index);
setOutputAtIndex(index, getA(${e}));
}
}
2023-01-06 19:23:06 +01:00
`}};function Pne(r,e=""){if(r>=5)throw Error(`Tile for rank ${r} is not yet supported`);if(r===1)return`(resRC % ${e}aShape)`;let t=["resRC.x","resRC.y","resRC.z","resRC.w"],o=[];for(let n=0;n<r;n++)o.push(`(${t[n]} % ${e}aShape[${n}])`);return o.join()}function EI(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{reps:s}=o;if(t.shouldExecuteOnCPU([n])||n.dtype==="string"||n.shape.length>=5){let p=t.readSync(n.dataId),u=n.dtype==="string"?p.map(m=>y.decodeString(m)):p,c=le(n.shape,n.dtype,u),l=IM(c,s);return t.makeTensorInfo(l.shape,l.dtype,l.values)}let a=new Tx(n.shape,s);return t.runWebGPUProgram(a,[n],n.dtype)}var zB={kernelName:to,backendName:"webgpu",kernelFunc:EI};function Mne(r){let{inputs:e,backend:t,attrs:o}=r,{sparseIndices:n,sparseValues:s,defaultValue:a}=e,{outputShape:i}=o,{sliceRank:p,numUpdates:u,sliceSize:c,strides:l,outputSize:m}=S.calculateShapes(s,n,i),d=!1;if(s.dtype==="string"){let A=t.bufferSync(n),R=t.bufferSync(s),D=y.decodeString(t.readSync(a.dataId)[0]),P=xM(A,R,i,m,c,u,p,l,D,d);return t.makeTensorInfo(i,P.dtype,P.values)}let f=[m/c,c],h=me({inputs:{x:n},backend:t,attrs:{shape:[u,p]}}),g=s.shape.length?me({inputs:{x:s},backend:t,attrs:{shape:[u,c]}}):Dt({inputs:{x:s},backend:t}),x=g.dtype,b=t.makeTensorInfo([],x,y.makeZerosTypedArray(1,x)),C=me({inputs:{x:a},backend:t,attrs:{shape:Array(f.length).fill(1)}}),w=EI({inputs:{x:C},backend:t,attrs:{reps:f}}),k=y.sizeFromShape([u,c]),_=[{type:"int32",data:[p]},{type:"int32",data:l},{type:"int32",data:[k]}];switch(u){case 0:break;case 1:{let A=new Hi([u,c],p,h.shape.length,g.shape.length,l,f,x,d);t.runWebGPUProgram(A,[g,h],x,_,w)}break;default:{let A=new Hi([u,c],p,h.shape.length,b.shape.length,l,f,x,d);t.runWebGPUProgram(A,[b,h],x,_,w)}{let A=new Hi([u,c],p,h.shape.length,g.shape.length,l,f,x);t.runWebGPUProgram(A,[g,h],x,_,w)}}let E=me({inputs:{x:w},backend:t,attrs:{shape:i}});return t.disposeData(h.dataId),t.disposeData(g.dataId),t.disposeData(C.dataId),t.disposeData(b.dataId),t.disposeData(w.dataId),E}var WB={kernelName:yi,backendName:"webgpu",kernelFunc:Mne};function Lne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{numOrSizeSplits:s,axis:a}=o,i=y.parseAxisParam(a,n.shape)[0],p=S.prepareSplitSize(n,s,i),u=n.shape.length,c=new Array(u).fill(0),l=n.shape.slice();return p.map(m=>{let d=[...l];d[i]=m;let f=fs({inputs:{x:n},backend:t,attrs:{begin:c,size:d}});return c[i]+=m,f})}var UB={kernelName:Rs,backendName:"webgpu",kernelFunc:Lne};var Bne=xe({opType:X.SQRT}),GB={kernelName:Hn,backendName:"webgpu",kernelFunc:Bne};var HB={kernelName:bi,backendName:"webgpu",kernelFunc:({inputs:r,backend:e})=>{let{x:t}=r,o=e,n=new Hr(t.shape,X.SQUARE);return o.runWebGPUProgram(n,[t],t.dtype)}};var Vne=Ze({opType:ge.SQUARED_DIFFERENCE}),KB={kernelName:jn,backendName:"webgpu",kernelFunc:Vne};function zne({inputs:r,attrs:e,backend:t}){let{x:o}=r,n=new Hr(o.shape,X.STEP,"stepAlpha : f32,"),s=[{type:"float32",data:[e.alpha]}];return t.runWebGPUProgram(n,[o],o.dtype,s)}var qB={kernelName:fo,backendName:"webgpu",kernelFunc:zne};var _x=class{constructor(e){this.variableNames=["x"],this.workPerThread=1,this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize,[this.workPerThread,1,1]);let t=Rt(this.outputShape.length);this.uniforms=`begin : ${t}, strides : ${t}, `,this.shaderKey="stridedSlice"}getUserCode(){let e=this.outputShape.length,t="";if(e===1)t="coords * uniforms.strides + uniforms.begin";else{let n=0;t=this.outputShape.map((s,a)=>(n++,this.outputShape.length===1?`coords * uniforms.strides[${a}] + uniforms.begin[${a}]`:`coords[${n-1}] * uniforms.strides[${a}] + uniforms.begin[${a}]`)).join(",")}return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
setOutputAtIndex(index, getX(${t}));
}
}
2023-01-06 19:23:06 +01:00
`}};function Wne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{begin:s,end:a,strides:i,beginMask:p,endMask:u,ellipsisMask:c,newAxisMask:l,shrinkAxisMask:m}=o,{finalShapeSparse:d,finalShape:f,isIdentity:h,sliceDim0:g,isSimpleSlice:x,begin:b,end:C,strides:w}=ut.sliceInfo(n.shape,s,a,i,p,u,c,l,m),k;if(h)k=me({inputs:{x:n},backend:t,attrs:{shape:f}});else if(g||x){y.assert(n.shape.length>=1,()=>`Input must have rank at least 1, got: ${n.shape.length}`);let _=ut.computeOutShape(b,C,w),E=fs({inputs:{x:n},backend:t,attrs:{begin:b,size:_}});k=me({inputs:{x:E},backend:t,attrs:{shape:f}}),t.disposeData(E.dataId)}else if(t.shouldExecuteOnCPU([n])){let E=t.readSync(n.dataId),A=le(n.shape,n.dtype,E),R=CM(d,A,w,b);k=t.makeTensorInfo(f,n.dtype,R.values)}else{let E=new _x(d),A=[{type:"int32",data:b},{type:"int32",data:w}],R=t.runWebGPUProgram(E,[n],n.dtype,A);k=me({inputs:{x:R},backend:t,attrs:{shape:f}}),t.disposeData(R.dataId)}return k}var jB={kernelName:Xn,backendName:"webgpu",kernelFunc:Wne};function Une(r){let{inputs:e,backend:t,attrs:o}=r,{separator:n,nGramWidths:s,leftPad:a,rightPad:i,padWidth:p,preserveShortSequences:u}=o,{data:c,dataSplits:l}=e,m=t.readSync(c.dataId),d=t.readSync(l.dataId),[f,h]=SM(m,d,n,s,a,i,p,u);return[t.makeTensorInfo([f.length],"string",f),t.makeTensorInfo(l.shape,"int32",h)]}var XB={kernelName:Ds,backendName:"webgpu",kernelFunc:Une};var Gne=xe({opType:X.TAN}),YB={kernelName:Qn,backendName:"webgpu",kernelFunc:Gne};var Hne=xe({opType:X.TANH}),QB={kernelName:Zn,backendName:"webgpu",kernelFunc:Hne};var $x=class{constructor(e){this.variableNames=["x","indices"],this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms=`inputSize : i32, firstPass : i32, negativeInf : f32,
2022-11-18 17:13:29 +01:00
dir : i32, inc : i32,`,this.shaderKey="swap"}getUserCode(){return`
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let outC = getCoordsFromIndex(index);
let batch = outC[0];
let elemIdx = outC[1];
// We compare elements pair-wise within a group of size 2 * inc.
// The comparing rule for each group alternates between ascending
// and descending. Within each group, we compare each pair at
// positions i and i+inc. To decide whether an element at position i
// is x0 or x1, we mod it by 2 * inc, if the result is smaller than
// inc, it is in the first half of the group, we denote it as x0,
// otherwise we denote it as x1.
// For example, as shown in the Bitonic top K paper referenced
// above, Figure5(a) shows that element[1] is in the second half of
// the group when group size is 2, but it is in the first half of
// the group when group size is 4.
let isFirstInPair = elemIdx % (2 * uniforms.inc) < uniforms.inc;
var i = 0;
if (isFirstInPair) {
i = elemIdx;
} else {
i = elemIdx - uniforms.inc;
}
var i0 = 0;
if (uniforms.firstPass == 1) {
i0 = i;
} else {
i0 = i32(getIndices(batch, i));
}
var i1 = 0;
if (uniforms.firstPass == 1) {
i1 = i + uniforms.inc;
} else {
i1 = i32(getIndices(batch, i + uniforms.inc));
}
var x0 = f32(0.0);
var x1 = f32(0.0);
if (i0 < uniforms.inputSize) {
x0 = getX(batch, i0);
} else {
x0 = uniforms.negativeInf;
}
if (i1 < uniforms.inputSize) {
x1 = getX(batch, i1);
} else {
x1 = uniforms.negativeInf;
}
let reverse = elemIdx % (2 * uniforms.dir) >= uniforms.dir;
let isGreater = x0 > x1 || (x0 == x1 && i1 > i0);
if (reverse == isGreater) {
// Elements in opposite order of direction
let iTemp = i0;
i0 = i1;
i1 = iTemp;
}
if (isFirstInPair) {
setOutputAtIndex(index, f32(i0));
} else {
setOutputAtIndex(index, f32(i1));
}
}
}
2023-01-06 19:23:06 +01:00
`}},Ex=class{constructor(e){this.variableNames=["x","indices"],this.workgroupSize=[256,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.uniforms="inputSize : i32, firstPass : i32, k : i32,",this.shaderKey="merge"}getUserCode(){return`
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let outC = getCoordsFromIndex(index);
let batch = outC[0];
let elemIdx = outC[1];
// The output size is half of the previous size.
// If the previous sequence is | | | | _ _ _ _ | | | | _ _ _ _
// (k=4), we only need to output the indices at positions |, the
// indices at positions _ can be thrown away, see Figure5(b) After
// Phase 2 (Merge phase) in the Bitonic Top K paper referenced
// above.
// For example, the paper shows we only need to output the orange
// bars. The output sequence should look like this | | | | | | | |.
// Because the sequence is halved, to map the output index back to
// the previous sequence to find the corresponding value, we need
// to double the index. When we double the index, we basically
// interpolate a position, so 2i looks like
// | _ | _ | _ | _ | _ | _ | _. We move the | to the first k
// position of each 2k positions by - elemIdx % k. E.g. for output
// at index 4,5,6,7, we want to get the corresponding element at
// original index 8,9,10,11, for output at index 8,9,10,11,
// we want to get the corresponding element at original index
// 16,17,18,19, so on and so forth.
var i = 0;
if (elemIdx < uniforms.k) {
i = elemIdx;
} else {
i = elemIdx * 2 - elemIdx % uniforms.k;
}
var i0 = 0;
if (uniforms.firstPass == 1) {
i0 = i;
} else {
i0 = i32(getIndices(batch, i));
}
var i1 = 0;
if (uniforms.firstPass == 1) {
i1 = i + uniforms.k;
} else {
i1 = i32(getIndices(batch, i + uniforms.k));
}
let x0 = getX(batch, i0);
var x1 = f32(0.0);
if (i1 < uniforms.inputSize) {
x1 = getX(batch, i1);
} else {
x1 = x0;
}
if (x0 >= x1) {
setOutputAtIndex(index, f32(i0));
} else {
setOutputAtIndex(index, f32(i1));
}
}
}
2023-01-06 19:23:06 +01:00
`}};function Rc(r,e){e!==null&&r.disposeData(e.dataId)}function ZB(r){let e=1;for(;e<r;)e*=2;return e}function Kne(r){let{inputs:e,backend:t,attrs:o}=r,{x:n}=e,{k:s,sorted:a}=o,i=n.shape,p=i[i.length-1];if(t.shouldExecuteOnCPU([n])){let k=t.readSync(n.dataId),[_,E]=vM(k,i,n.dtype,s,a);return[t.makeTensorInfo(_.shape,_.dtype,_.values),t.makeTensorInfo(E.shape,E.dtype,E.values)]}if(s===0)return i[i.length-1]=0,[t.makeTensorInfo(i,n.dtype,[]),t.makeTensorInfo(i,"int32",[])];if(p===1)return[n,fr({attrs:{shape:i,dtype:"int32",value:0},backend:t})];let c=y.sizeFromShape(i)/p,l=me({inputs:{x:n},attrs:{shape:[c,p]},backend:t}),m=ZB(s),d=ZB(p),f=null,h=()=>f===null?[l,l]:[l,f],g=(k,_,E)=>{let A=h(),R=new $x(E),P=[{type:"int32",data:[p]},{type:"int32",data:[f===null?1:0]},{type:"float32",data:[Number.NEGATIVE_INFINITY]},{type:"int32",data:[k]},{type:"int32",data:[_]}],M=f;f=t.runWebGPUProgram(R,A,"int32",P),Rc(t,M)};for(let k=1;k<m;k*=2){let _=k*2;for(let E=k;E>=1;E/=2)g(_,E,[c,d])}for(let k=d;k>m;k/=2){let _=h(),E=new Ex([c,k/2]),R=[{type:"int32",data:[p]},{type:"int32",data:[f===null?1:0]},{type:"int32",data:[m]}],D=f;f=t.runWebGPUProgram(E,_,"int32",R),Rc(t,D);let P=m/2,M=P*2;for(let L=P;L>=1;L/=2)g(M,L,f.shape)}let x=f;f=fs({inputs:{x:f},backend:t,attrs:{begin:0,size:[c,s]}}),Rc(t,x);let b=NI({inputs:{x:l,indices:f},backend:t,attrs:{axis:1,batchDims:1}});Rc(t,l);let C=i.slice(0,-1);C.push(s),x=f,f=me({inputs:{x:f},attrs:{shape:C},backend:t}),Rc(t,x);let w=b;return b=me({inputs:{x:b},attrs:{shape:C},backend:t}),Rc(t,w),[b,f]}var JB={kernelName:Jn,backendName:"webgpu",kernelFunc:Kne};var Ax=class{constructor(e){this.variableNames=["Image","Transforms"],this.uniforms="interpolationModeId : i32, fillModeId : i32, fillValue : f32,",this.workgroupSize=[64,1,1],this.size=!0,this.outputShape=e,this.dispatchLayout=ae(this.outputShape),this.dispatch=Y(this.dispatchLayout,this.outputShape,this.workgroupSize),this.shaderKey="transform"}getUserCode(){return`
2022-11-18 17:13:29 +01:00
fn mapCoord(outCoord : f32, len : f32) -> f32{
var inCoord = outCoord;
if(uniforms.fillModeId == 2) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz2 = 2.0 * len;
if (inCoord < sz2) {
inCoord = sz2 * f32(i32(f32(-inCoord / sz2))) +
inCoord;
}
if (inCoord < -len) {
inCoord = inCoord + sz2;
} else {
inCoord = -inCoord - 1.0;
}
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz2 = 2.0 * len;
inCoord = inCoord - sz2 * f32(i32(f32(inCoord / sz2)));
if (inCoord >= len) {
inCoord = sz2 - inCoord - 1.0;
}
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (uniforms.fillModeId == 3) {
if (inCoord < 0.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz = len - 1.0;
inCoord = inCoord + len * (f32(i32(f32(-inCoord / sz))) + 1.0);
}
} else if (inCoord > len - 1.0) {
if (len <= 1.0) {
inCoord = 0.0;
} else {
let sz = len - 1.0;
inCoord = inCoord - len * f32(i32(f32(inCoord / sz)));
}
}
return clamp(inCoord, 0.0, len - 1.0);
} else if (uniforms.fillModeId == 4) {
return clamp(outCoord, 0.0, len - 1.0);
}
return outCoord;
}
fn readWithFillValue(batch : i32, coordY : i32, coordX : i32,
channel : i32) -> f32 {
var outputValue : f32;
if (0 <= coordY && coordY < uniforms.imageShape[1] && 0 <= coordX && coordX < uniforms.imageShape[2]) {
outputValue = getImage(batch, coordY, coordX, channel);
} else {
outputValue = uniforms.fillValue;
}
return outputValue;
}
2023-01-06 19:23:06 +01:00
${Q("index")} {
2022-11-18 17:13:29 +01:00
if (index < uniforms.size) {
let coords = getCoordsFromIndex(index);
var outputValue : f32;
let batch = coords[0];
let x = coords[2];
let y = coords[1];
let channel = coords[3];
let xf = f32(x);
let yf = f32(y);
let a1 = getTransforms(batch, 0);
let a2 = getTransforms(batch, 1);
let a3 = getTransforms(batch, 2);
let b1 = getTransforms(batch, 3);
let b2 = getTransforms(batch, 4);
let b3 = getTransforms(batch, 5);
let c1 = getTransforms(batch, 6);
let c2 = getTransforms(batch, 7);
let projection = c1 * xf + c2 * yf + 1.0;
if (projection == 0.0) {
outputValue = uniforms.fillValue;
} else {
let inX = (a1 * xf + a2 * yf + a3) / projection;
let inY = (b1 * xf + b2 * yf + b3) / projection;
let mapX = mapCoord(inX, f32(uniforms.imageShape[2]));
let mapY = mapCoord(inY, f32(uniforms.imageShape[1]));
if (uniforms.interpolationModeId == 1) {
let coordY = i32(round(mapY));
let coordX = i32(round(mapX));
outputValue = readWithFillValue(batch, coordY, coordX,
channel);
} else {
let yFloor = floor(mapY);
let xFloor = floor(mapX);
let yCeil = yFloor + 1.0;
let xCeil = xFloor + 1.0;
let valueYFloor = (xCeil - mapX) *
readWithFillValue(batch, i32(yFloor), i32(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, i32(yFloor), i32(xCeil), channel);
let valueYCeil = (xCeil - mapX) *
readWithFillValue(batch, i32(yCeil), i32(xFloor), channel) +
(mapX - xFloor) *
readWithFillValue(batch, i32(yCeil), i32(xCeil), channel);
outputValue = (yCeil - mapY) * valueYFloor +
(mapY - yFloor) * valueYCeil;
}
}
setOutputAtIndex(index, outputValue);
}
}
2023-01-06 19:23:06 +01:00
`}};function qne(r){let{inputs:e,backend:t,attrs:o}=r,{image:n,transforms:s}=e,{interpolation:a,fillMode:i,fillValue:p,outputShape:u}=o,[c,l,m,d]=n.shape,[f,h]=u!=null?u:[l,m],g=[c,f,h,d],x=new Ax(g),b=a==="nearest"?1:2,C;switch(i){case"constant":C=1;break;case"reflect":C=2;break;case"wrap":C=3;break;case"nearest":C=4;break;default:C=1;break}let w=[{type:"int32",data:[b]},{type:"int32",data:[C]},{type:"float32",data:[p]}];return t.runWebGPUProgram(x,[n,s],"float32",w)}var eV={kernelName:es,backendName:"webgpu",kernelFunc:qne};function jne(r){let{inputs:e,backend:t,attrs:o}=r,{value:n}=e,{axis:s}=o;s<0&&(s+=n.shape.length);let a=n,i=a.shape.length,p=n.shape[s],u=new Array(i-1),c=0;for(let h=0;h<i;h++)h!==s&&(u[c++]=a.shape[h]);let l=[],m=new Array(i).fill(0),d=a.shape.slice();d[s]=1;let f=new Array(p);for(let h=0;h<f.length;h++){m[s]=h;let g=fs({inputs:{x:a},backend:t,attrs:{begin:m,size:d}}),x=me({inputs:{x:g},backend:t,attrs:{shape:u}});f[h]=x,l.push(g)}return l.forEach(h=>t.disposeData(h.dataId)),f}var tV={kernelName:Fs,backendName:"webgpu",kernelFunc:jne};var Xne=[G3,NM,TM,_M,$M,EM,RM,DM,FM,OM,PM,MM,LM,BM,VM,UM,GM,HM,qM,jM,ZM,JM,eL,K3,rL,oL,nL,sL,aL,iL,uL,lL,mL,dL,fL,hL,gL,xL,CL,SL,wL,IL,vL,kL,NL,TL,W3,_L,AL,$L,EL,RL,DL,FL,OL,PL,ML,LL,H3,BL,tL,VL,zL,WL,UL,GL,HL,KL,jL,qL,XL,YL,QL,JL,zM,eB,tB,WM,rB,oB,nB,sB,yL,aB,iB,uB,XM,pB,mB,dB,fB,hB,gB,xB,yB,YM,bB,CB,SB,wB,U3,IB,vB,kB,NB,TB,_B,$B,EB,AB,RB,DB,FB,OB,PB,KM,qB,jB,XB,LB,BB,VB,WB,UB,GB,HB,KB,MB,bL,YB,QB,zB,JB,eV,AM,tV,cB];for(let r of Xne)ka(r);var rV="4.2.0",Yne="4.2.0",Qne="4.2.0",Zne="4.2.0",Jne="4.2.0",ese="0.0.1-alpha.17",tse={tfjs:rV,"tfjs-core":rV,"tfjs-converter":Yne,"tfjs-backend-cpu":Qne,"tfjs-backend-webgl":Zne,"tfjs-backend-wasm":Jne,"tfjs-backend-webgpu":ese};export{ys as Abs,aa as Acos,ia as Acosh,yu as AdadeltaOptimizer,bu as AdagradOptimizer,Cu as AdamOptimizer,Su as AdamaxOptimizer,eo as Add,Po as AddN,Mo as All,Lo as Any,Bo as ArgMax,ei as ArgMin,ua as Asin,pa as Asinh,ca as Atan,ma as Atan2,la as Atanh,Vo as AvgPool,dp as AvgPool3D,vm as AvgPool3DGrad,mp as AvgPoolGrad,Ll as BackendWasm,zo as BatchMatMul,bs as BatchToSpaceND,ti as Bincount,fp as BroadcastArgs,ise as BroadcastTo,co as Cast,Wo as Ceil,lo as ClipByValue,ri as Complex,hp as ComplexAbs,Cs as Concat,Uo as Conv2D,oi as Conv2DBackpropFilter,Go as Conv2DBackpropInput,gp as Conv3D,km as Conv3DBackpropFilterV2,xp as Conv3DBackpropInputV2,Ho as Cos,Ko as Cosh,Xo as CropAndResize,qo as Cumprod,jo as Cumsum,Fo as DataStorage,ni as DenseBincount,Yo as DepthToSpace,Qo as DepthwiseConv2dNative,yp as DepthwiseConv2dNativeBackpropFilter,bp as DepthwiseConv2dNativeBackpropInput,si as Diag,ai as Dilation2D,$b as Dilation2DBackpropFilter,_b as Dilation2DBackpropInput,kb as ENV,ii as Einsum,Jo as Elu,Nm as EluGrad,Uc as Environment,en as Equal,da as Erf,tn as Exp,Ss as ExpandDims,fa as Expm1,ui as FFT,ws as Fill,rn as FlipLeftRight,on as Floor,nn as FloorDiv,Zi as FromPixels,sn as FusedBatchNorm,go as FusedConv2D,xo as FusedDepthwiseConv2D,Lu as GPGPUContext,an as GatherNd,Is as GatherV2,fl as GraphModel,un as Greater,pn as GreaterEqual,pi as IFFT,mo as Identity,ci as Imag,ha as IsFinite,ga as IsInf,cn as IsNan,Zr as KernelBackend,mi as LRN,Tm as LRNGrad,ln as LeakyRelu,mn as Less,dn as LessEqual,li as LinSpace,fn as Log,xa as Log1p,use as LogSoftmax,hn as LogicalAnd,gn as LogicalNot,xn as LogicalOr,XI as LogicalXor,pse as LowerBound,Pi as MathBackendCPU,Vi as MathBackendWebGL,yn as Max,Cn as MaxPool,Cp as MaxPool3D,$m as MaxPool3DGrad,_m as MaxPoolGrad,Sp as MaxPoolWithArgmax,bn as Maximum,Sn as Mean,wn as Min,In as Minimum,vn as MirrorPad,ya as Mod,wu as MomentumOptimizer,wp as Multinomial,kn as Multiply,vs as Neg,Tn as NonMaxSuppressionV3,ba as NonMaxSuppressionV4,_n as NonMaxSuppressionV5,Nn as NotEqual,jb as OP_SCOPE_SUFFIX,$n as OneHot,ks as OnesLike,wr as Optimizer,cl as OptimizerConstructors,Ns as Pack,En as PadV2,cse as Pool,An as Pow,Rn as Prelu,Dn as Prod,Iu as RMSPropOptimizer,Ip as RaggedGather,vp as RaggedRange,kp as RaggedTensorToTensor,Ts as Range,Lb as Rank,di as Real,Zo as RealDiv,F