Thoughts & Reflections on the article “Mixing movement and machine

“What do I want from it”

“The tech wasn’t an end in itself, it was only useful to him as a tool for artistic expression.”

“let the concept star and the technology play a supporting role”

We should ask more about how AI could assist/inspire/help people (especially) to create their works. It reminds me of the response from Steve Jobs to an insulting question in an interview. He said that “it’s so hard to fit it (tech innovation) into a cohesive larger vision,” and “you’ve got to start with the customer experience and work backwards to the technology.”

COCO is such an interesting dataset🤔

Untitled

Considering the Model and Data Biography reflect on the the following questions:

Ideas Brainstorm:

Interesting projects (Wekinator):

Untitled

                                [Train your facial expressions using machine learning](<https://vimeo.com/175947130>)

Idea formation and test-out:

I want to use PoseNet output as an input to my own model, making a Poses recognition model that produces different outputs like colors, sounds, etc.

Untitled

Untitled

At first, I learned how to collect, save, and train datas on p5.js following the instruction on coding train. [link]

Then we I’m testing the model, there is an interesting discovery that the predicted value depends more on the volume of the training set rather than the distance. For example, Circles at left-down corner, which overlap with the training set B, are actually categorized as F. I think this is probably due to the great amount of training set F above.

Untitled

Now, I want to train a model that could classify different poses and produce regression outputs, such as a range of RGB color, or sound pitch.

Firstly, I need to collect and save the dataset.

// Source
// The Coding Train / Daniel Shiffman
// <https://thecodingtrain.com/Courses/ml5-beginners-guide/7.3-pose-regression.html>

let video;
let poseNet;
let pose;
let skeleton;
let brain;
let state = 'waiting';
let targetColor;
let rSlider, gSlider, bSlider;

function delay(time) {
  return new Promise((resolve, reject) => {
    if (isNaN(time)) {
      reject(new Error('delay requires a valid number.'));
    } else {
      setTimeout(resolve, time);
    }
  });
}

async function keyPressed() {
  if (key == 's') {
    brain.saveData();
  } else if (key == 'd') {
    
    let r = rSlider.value();
    let g = gSlider.value();
    let b = bSlider.value();
    targetColor = [r,g,b];
    
    console.log(r,g,b);
    
    await delay(3000);
    console.log('collecting');
    state = 'collecting';
    
    await delay(3000);
    console.log('not collecting');
    state = 'waiting';
  }

}

function setup() {
  createCanvas(640, 480);
  
  rSlider = createSlider(0,255,255);
  gSlider = createSlider(0,255,0);
  bSlider = createSlider(0,255,0);
  
  
  video = createCapture(VIDEO);
  video.hide();
  poseNet = ml5.poseNet(video, modelLoaded);
  poseNet.on('pose', gotPoses);

  let options = {
    inputs: 34,
    outputs: 3,
    // outputs: ['red','green','blue'],
    task: 'regression',
    debug: true
  }
  brain = ml5.neuralNetwork(options);
}

function gotPoses(poses) {
  // console.log(poses); 
  if (poses.length > 0) {
    pose = poses[0].pose;
    skeleton = poses[0].skeleton;
    if (state == 'collecting') {
      let inputs = [];
      for (let i = 0; i < pose.keypoints.length; i++) {
        let x = pose.keypoints[i].position.x;
        let y = pose.keypoints[i].position.y;
        inputs.push(x);
        inputs.push(y);
      }
      brain.addData(inputs, targetColor);
    }
  }

}

function modelLoaded() {
  console.log('poseNet ready');
}

function draw() {
  translate(video.width, 0);
  scale(-1, 1);
  image(video, 0, 0, video.width, video.height);

  if (pose) {
    for (let i = 0; i < skeleton.length; i++) {
      let a = skeleton[i][0];
      let b = skeleton[i][1];
      strokeWeight(2);
      stroke(0);

      line(a.position.x, a.position.y, b.position.x, b.position.y);
    }
    for (let i = 0; i < pose.keypoints.length; i++) {
      let x = pose.keypoints[i].position.x;
      let y = pose.keypoints[i].position.y;
      fill(0);
      stroke(255);
      ellipse(x, y, 16, 16);
    }
  }
}

Then, we input the dataset into the training model and then save the parameters.

Untitled

// The Coding Train / Daniel Shiffman
// <https://thecodingtrain.com/Courses/ml5-beginners-guide/7.3-pose-regression.html>
let brain;

function setup() {
  noCanvas();
  let options = {
    inputs: 34,
    outputs: 3,
    task: 'regression',
    debug: true
  }
  brain = ml5.neuralNetwork(options);
  brain.loadData('color_poses.json', dataReady);
}

function dataReady() {
  brain.normalizeData();
  brain.train({epochs: 50}, finished); 
}

function finished() {
  console.log('model trained');
  brain.save();
}

Finally, we can load the model and run program:

let video;
let poseNet;
let pose;
let skeleton;

let brain;

let rSlider, gSlider, bSlider;

function setup() {
  createCanvas(640, 480);

  rSlider = createSlider(0, 255, 0);
  gSlider = createSlider(0, 255, 0);
  bSlider = createSlider(0, 255, 0);

  video = createCapture(VIDEO);
  video.hide();
  poseNet = ml5.poseNet(video, modelLoaded);
  poseNet.on('pose', gotPoses);

  let options = {
    inputs: 34,
    outputs: 3,
    task: 'regression',
    debug: true
  }
  brain = ml5.neuralNetwork(options);
  const modelInfo = {
    model: 'model/model.json',
    metadata: 'model/model_meta.json',
    weights: 'model/model.weights.bin',
  };
  brain.load(modelInfo, brainLoaded);
}

function brainLoaded() {
  console.log('pose predicting ready!');
  predictColor();
}

function predictColor() {
  if (pose) {
    let inputs = [];
    for (let i = 0; i < pose.keypoints.length; i++) {
      let x = pose.keypoints[i].position.x;
      let y = pose.keypoints[i].position.y;
      inputs.push(x);
      inputs.push(y);
    }
    brain.predict(inputs, gotResult);
  } else {
    setTimeout(predictColor, 100);
  }
}

function gotResult(error, results) {
  console.log(results);
  let r = results[0].value;
  let g = results[1].value;
  let b = results[2].value;
  rSlider.value(r);
  gSlider.value(g);
  bSlider.value(b);
  predictColor();
}

function gotPoses(poses) {
  // console.log(poses); 
  if (poses.length > 0) {
    pose = poses[0].pose;
    skeleton = poses[0].skeleton;
  }
}

function modelLoaded() {
  console.log('poseNet ready');
}

function draw() {
  push();
  translate(video.width, 0);
  scale(-1, 1);
  image(video, 0, 0, video.width, video.height);

  if (pose) {
    for (let i = 0; i < skeleton.length; i++) {
      let a = skeleton[i][0];
      let b = skeleton[i][1];
      strokeWeight(2);
      stroke(0);

      line(a.position.x, a.position.y, b.position.x, b.position.y);
    }
    for (let i = 0; i < pose.keypoints.length; i++) {
      let x = pose.keypoints[i].position.x;
      let y = pose.keypoints[i].position.y;
      fill(0);
      stroke(255);
      ellipse(x, y, 16, 16);
    }
  }
  pop();

  let r = rSlider.value();
  let g = gSlider.value();
  let b = bSlider.value();
  background(r, g, b, 100);

}

Links for the p5 sketches:

Collecting data

Training data

Deploying model

Lessons I learned:

  1. How to train a regression model: collecting, saving, loading, training, & using data.
  2. What is a promise and why we need it?
  3. Scale() function.

Future ideas

I’m still working on using PoseNet model to control the sound in terms of its pitch and range.