DEV Community

Saga
Saga

Posted on

Using instancedSkinnedMesh in Three.js, enabling the rendering of hundreds of 3D characters on screen simultaneously

In Threejs products, we all know that object instantiation is necessary to improve rendering performance. However, the official instancedMesh provided by Threejs only supports static objects. In our product https://timmerse.com, we need to accommodate dozens or even hundreds of people participating in the same event. During testing, we found that when the number of Avatars exceeds twenty, it causes frame drops on the page. It cannot achieve a smooth rendering efficiency of 60 frames per second.

Image description

We have made several optimization attempts for 3D rendering:

  1. Reducing draw calls

We performed model simplification, reducing the number of triangles in the scene as much as possible without affecting the overall rendering effect. There's a general method for this, and since our scene is already low-poly, many elements still maintain good rendering quality after simplification. This significantly reduced the number of triangles in the scene. Additionally, some advanced rendering effects, such as CSM (Cascaded Shadow Maps), are only enabled on high-performance machines. Below is an implementation of GPU performance detection capability:

export class GpuDetector {
  gpu: string;
  _level: 'high' | 'low' | 'middle' = 'high';

  constructor() {
    this.gpu = getGPUModel();
    this.detectPC();
    isMobileOrCloud ? this.detectMobile() : this.detectPC();
    console.log('GPU: ', this.gpu, ';level:', this._level);
  }
  get level(): 'high' | 'low' | 'middle' {
    return this._level;
  }
  detectMobile() {
    if (iOS) {
      this._level = window.screen.height >= 812 && window.devicePixelRatio >= 2 ? 'high' : 'low';
    }
    if (/adreno/i.test(this.gpu)) {
      this._level = this.adrenoGPU();
    } else if (/mali/i.test(this.gpu)) {
      this._level = this.maliGPU();
    } else if (/powervr/i.test(this.gpu)) {
      this._level = this.powerVRGPU();
    }
  }
  detectPC() {
    if (!this.gpu) {
      this._level = 'low'
    }
    /apple m/i.test(this.gpu)
      ? (this._level = 'high')
      : /apple/i.test(this.gpu)
      ? (this._level = 'middle')
      : /nvidia/i.test(this.gpu)
      ? (this._level = this.nvidiaGPU())
      : /amd/i.test(this.gpu)
      ? (this._level = this.amdGPU())
      : /intel/i.test(this.gpu) && (this._level = this.intelGPU());
  }
  powerVRGPU() {
    return /GT8/i.test(this.gpu) ? 'high' : 'low';
  }
  adrenoGPU() {
    var r = /^.+adreno\D+(\d+).+$/i.exec(this.gpu);
    if (r !== null) {
      var t = parseInt(r[1]);
      return t > 640 ? 'high' : t >= 570 ? 'middle' : 'low';
    }
    var e = this.gpu.split(' '),
      t = parseInt(e[e.length - 1]);
    return t > 640 ? 'high' : t >= 570 ? 'middle' : 'low';
  }
  maliGPU() {
    if (/mali-g/i.test(this.gpu)) {
      var e = this.gpu.split('Mali-G'),
        t = parseInt(e[e.length - 1]);
      return t > 77 ? 'high' : 76 === t || 31 === t || 52 === t ? 'middle' : 'low';
    }
    return 'low';
  }
  nvidiaGPU() {
    return /(rtx|titan)/i.test(this.gpu) ? 'high' : /gtx/i.test(this.gpu) ? 'middle' : 'low';
  }
  amdGPU() {
    if (/(pro|radeon vii)/i.test(this.gpu)) return 'middle';
    // if (/(pro|radeon vii)/i.test(this.gpu)) return 'high';
    if (/(rx)/i.test(this.gpu)) {
      var e = this.gpu.split('RX ');
      return parseInt(e[e.length - 1]) > 560 ? 'middle' : 'low';
      // return parseInt(e[e.length - 1]) > 560 ? 'high' : 'middle';
    }
    return 'middle';
  }
  // Intel gpu
  intelGPU() {
    if (/iris/i.test(this.gpu)) {
      if (/opengl engine/i.test(this.gpu)) return 'middle';
      var e = this.gpu.split('Graphics ');
      return parseInt(e[1]) >= 650 ? 'middle' : 'low';
    }
    if (/HD/i.test(this.gpu)) {
      var t = this.gpu.split('HD ');
      return parseInt(t[1]) > 7e3 ? 'middle' : 'low';
    }
    return /apple/i.test(this.gpu) ? 'middle' : 'low';
  }
}
Enter fullscreen mode Exit fullscreen mode
  1. Reduce texture size

Skybox textures, textures for various Avatar components, image and video assets. There's also a deeply hidden texture path for user-uploaded model textures. We can use https://gltf.report/ to analyze the specific GPU size occupied by textures in model files. I've written an analysis tool that can scan glb files in a directory and analyze and locate model issues.

const fs = require('fs');
const path = require('path');
const { Document, NodeIO } = require('@gltf-transform/core');
const { execSync } = require('child_process');
const {
  KHRDracoMeshCompression,
  KHRMaterialsEmissiveStrength,
  KHRMaterialsSpecular,
  KHRMaterialsIOR,
  KHRMaterialsClearcoat,
  KHRMaterialsIridescence,
  KHRMeshQuantization,
  EXTMeshoptCompression,
} = require('@gltf-transform/extensions');
const draco3d = require('draco3dgltf');
const meshopt = require('meshoptimizer');

(async () => {
  let totalGPUMemory = 0;
  const resList = [];

  function inspectFile(filePath) {
    const output = execSync(`gltf-transform inspect "${filePath}"`, { encoding: 'utf-8' });
    console.log(output);
  }

  async function processFile(filePath) {
    const io = new NodeIO()
      .registerExtensions([
        KHRDracoMeshCompression,
        KHRMaterialsEmissiveStrength,
        KHRMaterialsSpecular,
        KHRMaterialsIOR,
        KHRMaterialsClearcoat,
        KHRMaterialsIridescence,
        KHRMeshQuantization,
        EXTMeshoptCompression,
      ])
      .registerDependencies({
        'draco3d.decoder': await draco3d.createDecoderModule(),
        'meshopt.decoder': await meshopt.MeshoptDecoder,
      });
    let document;
    try {
      document = await io.read(filePath);
    } catch (error) {
      console.error(`Error reading ${filePath}:`, error);
      return;
    }

    let fileGPUMemory = 0;

    document
      .getRoot()
      .listTextures()
      .forEach(texture => {
        const image = texture.getImage();
        const dimensions = texture.getSize();
        if (image && dimensions) {
          // Assuming 4 bytes per pixel (RGBA)
          const memorySize = dimensions[0] * dimensions[1] * 4;
          fileGPUMemory += memorySize;
        }
      });
    const res = `${fileGPUMemory / 1024 / 1024} MB texture GPU memory: ${filePath}`;
    resList.push(res);
  }

  async function traverseDirectory(directoryPath) {
    const files = fs.readdirSync(directoryPath);
    for (const file of files) {
      const fullPath = path.join(directoryPath, file);
      if (fs.statSync(fullPath).isDirectory()) {
        await traverseDirectory(fullPath);
      } else if (fullPath.endsWith('.glb') || fullPath.endsWith('.gltf')) {
        await processFile(fullPath);
      }
    }
  }

  // await traverseDirectory(path.resolve(__dirname, 'models'));
  await traverseDirectory(path.resolve(__dirname, '../../../avatar'))

  resList.sort((a, b) => {
    const aMemory = parseFloat(a.split(' ')[0]);
    const bMemory = parseFloat(b.split(' ')[0]);
    return bMemory - aMemory;
  });
  console.log(resList);
  fs.writeFileSync('./gpu-memory.txt', resList.join('\n'), 'utf-8');
})();

Enter fullscreen mode Exit fullscreen mode

3.Instantiation of Skinned Meshes

After completing the two optimizations mentioned above, large scenes can now run smoothly on both PC and mobile devices when there are relatively few Avatars. The current bottleneck is now with the Avatars themselves. Our Avatars have more than ten components: hairstyles, facial features, clothing, etc. Since the characters support movement and can perform many skeletal animations, each Avatar's skinned mesh represents a significant performance cost. With 20 people, there are over 200 skinned meshes in the scene, which already creates a performance bottleneck in rendering.

Regarding the discussion of instancedSkinnedMesh in the three.js issues: https://github.com/mrdoob/three.js/pull/22667, here's the extracted core code

import * as THREE from 'three'

const _instanceLocalMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _instanceWorldMatrix = /*@__PURE__*/ new THREE.Matrix4()

const _offsetMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _identityMatrix = /*@__PURE__*/ new THREE.Matrix4()

const _instanceIntersects = []

let patchedChunks = false

export class InstancedSkinnedMesh extends THREE.SkinnedMesh {
  constructor(geometry, material, count = 1) {
    super(geometry, material)

    this.instanceMatrix = new THREE.InstancedBufferAttribute(
      new Float32Array(count * 16),
      16
    )
    this.instanceColor = null
    this.instanceBones = null

    this.count = count

    this.frustumCulled = false

    this._mesh = null
    this.isInstancedMesh = true

    const bind = this.bind.bind(this)
    this.bind = function (skeleton, bindMatrix) {
      bind(skeleton, bindMatrix)

      this.skeleton.update = (instanceBones, id) => {
        const bones = this.skeleton.bones
        const boneInverses = this.skeleton.boneInverses
        const boneMatrices = instanceBones || this.skeleton.boneMatrices
        const boneTexture = this.skeleton.boneTexture
        const instanceId = id || 0

        // flatten bone matrices to array
        for (let i = 0, il = bones.length; i < il; i++) {
          // compute the offset between the current and the original transform
          const matrix = bones[i] ? bones[i].matrixWorld : _identityMatrix

          _offsetMatrix.multiplyMatrices(matrix, boneInverses[i])
          _offsetMatrix.toArray(
            boneMatrices,
            16 * (i + instanceId * bones.length)
          )
        }

        if (boneTexture !== null) {
          boneTexture.needsUpdate = true
        }
      }

      this.skeleton.computeBoneTexture = this.skeleton.computeInstancedBoneTexture = () => {
        this.skeleton.boneTexture = new THREE.DataTexture(
          this.instanceBones,
          this.skeleton.bones.length * 4,
          this.count,
          THREE.RGBAFormat,
          THREE.FloatType
        )
        this.skeleton.boneTexture.needsUpdate = true
      }
    }

    // Patch three.js skinning shader chunks for points and instanced bones
    if (!patchedChunks) {
      patchedChunks = true

      THREE.ShaderChunk.points_vert = THREE.ShaderChunk.points_vert.replace(
        '#include <clipping_planes_pars_vertex>',
        '#include <clipping_planes_pars_vertex>\n#include <skinning_pars_vertex>'
      )
      THREE.ShaderChunk.points_vert = THREE.ShaderChunk.points_vert.replace(
        '#include <morphtarget_vertex>',
        '#include <skinbase_vertex>\n#include <morphtarget_vertex>\n#include <skinning_vertex>'
      )

      // Update PointsMaterial
      THREE.ShaderLib.points.vertexShader = THREE.ShaderChunk.points_vert

      THREE.ShaderChunk.skinning_pars_vertex = /* glsl */ `
        #ifdef USE_SKINNING

          uniform mat4 bindMatrix;
          uniform mat4 bindMatrixInverse;

          uniform highp sampler2D boneTexture;
          uniform int boneTextureSize;

          mat4 getBoneMatrix( const in float i ) {

          #ifdef USE_INSTANCING

              int j = 4 * int(i);
              vec4 v1 = texelFetch(boneTexture, ivec2( j, gl_InstanceID ), 0);
              vec4 v2 = texelFetch(boneTexture, ivec2( j + 1, gl_InstanceID ), 0);
              vec4 v3 = texelFetch(boneTexture, ivec2( j + 2, gl_InstanceID ), 0);
              vec4 v4 = texelFetch(boneTexture, ivec2( j + 3, gl_InstanceID ), 0);

          #else

            float j = i * 4.0;
            float x = mod( j, float( boneTextureSize ) );
            float y = floor( j / float( boneTextureSize ) );

            float dx = 1.0 / float( boneTextureSize );
            float dy = 1.0 / float( boneTextureSize );

            y = dy * ( y + 0.5 );

            vec4 v1 = texture2D( boneTexture, vec2( dx * ( x + 0.5 ), y ) );
            vec4 v2 = texture2D( boneTexture, vec2( dx * ( x + 1.5 ), y ) );
            vec4 v3 = texture2D( boneTexture, vec2( dx * ( x + 2.5 ), y ) );
            vec4 v4 = texture2D( boneTexture, vec2( dx * ( x + 3.5 ), y ) );

          #endif

            mat4 bone = mat4( v1, v2, v3, v4 );

            return bone;

          }

        #endif
      `
    }
  }

  copy(source) {
    super.copy(source)

    if (source.isInstancedMesh) {
      this.instanceMatrix.copy(source.instanceMatrix)

      if (source.instanceColor !== null)
        this.instanceColor = source.instanceColor.clone()

      this.count = source.count
    }

    return this
  }

  getColorAt(index, color) {
    color.fromArray(this.instanceColor.array, index * 3)
  }

  getMatrixAt(index, matrix) {
    matrix.fromArray(this.instanceMatrix.array, index * 16)
  }

  raycast(raycaster, intersects) {
    const matrixWorld = this.matrixWorld
    const raycastTimes = this.count

    if (this._mesh === null) {
      this._mesh = new THREE.SkinnedMesh(this.geometry, this.material)
      this._mesh.copy(this)
    }

    const _mesh = this._mesh

    if (_mesh.material === undefined) return

    for (let instanceId = 0; instanceId < raycastTimes; instanceId++) {
      // calculate the world matrix for each instance

      this.getMatrixAt(instanceId, _instanceLocalMatrix)

      _instanceWorldMatrix.multiplyMatrices(matrixWorld, _instanceLocalMatrix)

      // the mesh represents this single instance

      _mesh.matrixWorld = _instanceWorldMatrix

      _mesh.raycast(raycaster, _instanceIntersects)

      // process the result of raycast

      for (let i = 0, l = _instanceIntersects.length; i < l; i++) {
        const intersect = _instanceIntersects[i]
        intersect.instanceId = instanceId
        intersect.object = this
        intersects.push(intersect)
      }

      _instanceIntersects.length = 0
    }
  }

  setColorAt(index, color) {
    if (this.instanceColor === null) {
      this.instanceColor = new THREE.InstancedBufferAttribute(
        new Float32Array(this.instanceMatrix.count * 3),
        3
      )
    }

    color.toArray(this.instanceColor.array, index * 3)
  }

  setMatrixAt(index, matrix) {
    matrix.toArray(this.instanceMatrix.array, index * 16)
  }

  setBonesAt(index, skeleton) {
    skeleton = skeleton || this.skeleton

    const size = skeleton.bones.length * 16

    if (this.instanceBones === null) {
      this.instanceBones = new Float32Array(size * this.count)
    }

    skeleton.update(this.instanceBones, index)
  }

  updateMorphTargets() {}

  dispose() {
    this.dispatchEvent({ type: 'dispose' })
  }
}

Enter fullscreen mode Exit fullscreen mode

4.Business code optimization

After completing the above three major performance optimizations, what remains are the code snippets written by business colleagues that unintentionally affect the rendering process. This part is relatively easy to identify using Chrome's DevTools, so I won't elaborate further.

Optimization Results

After the series of optimizations mentioned above, we achieved having over a hundred people in a unified space while maintaining 60 frames per second performance on https://timmerse.com.

Image description

You can enter our Demo page for further experience:

Image description

Top comments (0)