What's new

Scripting Parsing layer ID from PSD file using Extendscript


ianpinder

New Member
Messages
4
Likes
0
This may be something that is outside the scope of scripting, but I really need to gather the unique layer IDs from a PSD file, without opening it in photoshop. I have seen information on C++ parsing libraries, but my coding is quite weak.

Ideally I would be able to gather each of the layer names and its respective unique layer ID, in order for the info to be used within a script for After Effects, written using Extendscript (.jsx).

Even the most basic of pointers would be appreciated, and thanks for your time :)
- Ian
 

Paul MR

The Script Master
Messages
374
Likes
203
Hi Ian, it's a long time since I tried anything like this. The structure of the PSD will have changed since I wrote this script but it might give you a start.
It hopefully gives the layer names. You will have to find where the ID's are kept and add it to the script. As it is direct access to the file any App that uses jsx should work? (Just remove the UI section etc.)
Code:
#target photoshop;
function main() {
file = File.openDialog("Please select PSD file.","PSD File:*.psd"); 
  if(!file.exists) return;
  file.open("r");
  file.encoding = 'BINARY';
  var dat = file.read();
  file.close();
 
  var result;
  var pos =[];
  var Text= [];

  var rex = /8BIMluni/g;
 
  while ((result = rex.exec(dat)) != null) { // Find ALL occurencies of search string
  //   $.writeln(result.index+(result[0].length));
    //$.writeln(result.index); //Index of regex found
    //  alert(result[0].length); //Length of string found
    pos.push(result.index+(result[0].length));
  }
 
  function readByte(str, ofs) {
    return str.charCodeAt(ofs);
  }
  function readInt16(str, ofs) {
    return (readByte(str, ofs) << 8) + readByte(str, ofs+1);
  }
  function readWord(str, ofs) {
    return (readInt16(str, ofs) << 16) + readInt16(str, ofs+2);
  }
  function readUnicodeChar(str, ofs) {
    return String.fromCharCode(readInt16(str,  ofs));
  }
 
  for (var i = 0; i < pos.length; i++) { //loop through all finds
    var ofs = pos[i]; //offset of find
    var textLength = readWord(dat, (ofs+ 4));  //Read length of string at offset
    //alert(textLength);
    ofs += 8; //increment offset to suit
    var str = ''; //reset string to ''
 
    for (var j = 0; j < textLength; j++) { //loop through string 
      str += readUnicodeChar(dat, ofs); // add char
      ofs += 2; //increment two bytes
    }
if(!str.match(/<\/Layer group/)){
    Text.push(str); //Layer name found and stored
    }
  }
var w = new Window('dialog',decodeURI(file.name));
w.lb1 = w.add('listbox',undefined,'Names');
w.lb1.preferredSize= [500,200];
w.bu1 = w.add('button',undefined, 'Exit');
w.bu1.preferredSize = [500,35];
w.bu1.onClick=function(){
    w.close(0);
    }
Text.reverse();
for (var i=0,len=Text.length;i<len;i++) { 
    w.lb1.add('item',  Text[i]); 
};

w.show();
};
 
main();
 

ianpinder

New Member
Messages
4
Likes
0
Hey Paul,
Thank you SO much for this. You are a legend. I'll take a look at this later, but it looks like exactly the kind of start that I needed.

Thanks again Paul :)
 

Paul MR

The Script Master
Messages
374
Likes
203
Hi Ian, just had a quick play, you will need to tidy the code up but this seems to get the names and IDs of all layers.
Best of luck!
Code:
#target photoshop;
function main() {
file = File.openDialog("Please select PSD file.","PSD File:*.psd"); 
  if(!file.exists) return;
  file.open("r");
  file.encoding = 'BINARY';
  var dat = file.read();
  file.close();
 
  var result;
  var pos =[];
  var Text= [];
    var IDs = [];
  var rex = /8BIMluni/g;
 
  while ((result = rex.exec(dat)) != null) { // Find ALL occurencies of search string
  //   $.writeln(result.index+(result[0].length));
    //$.writeln(result.index); //Index of regex found
    //  alert(result[0].length); //Length of string found
    pos.push(result.index+(result[0].length));
  }
    var rex2 = new RegExp ('8BIMlyid','g');
  while ((result = rex2.exec(dat)) != null) { // Find ALL occurencies of search string
    //$.writeln(result.index); //Index of regex found
   IDs.push( readWord(dat, result.index + 12));
   }
  function readByte(str, ofs) {
    return str.charCodeAt(ofs);
  }
  function readInt16(str, ofs) {
    return (readByte(str, ofs) << 8) + readByte(str, ofs+1);
  }
  function readWord(str, ofs) {
    return (readInt16(str, ofs) << 16) + readInt16(str, ofs+2);
  }
  function readUnicodeChar(str, ofs) {
    return String.fromCharCode(readInt16(str,  ofs));
  }
 
  for (var i = 0; i < pos.length; i++) { //loop through all finds
    var ofs = pos[i]; //offset of find
    var textLength = readWord(dat, (ofs+ 4));  //Read length of string at offset
    //alert(textLength);
    ofs += 8; //increment offset to suit
    var str = ''; //reset string to ''
 
    for (var j = 0; j < textLength; j++) { //loop through string 
      str += readUnicodeChar(dat, ofs); // add char
      ofs += 2; //increment two bytes
    }
//if(!str.match(/<\/Layer group/)){ // end of a layer group
    Text.push(str); //Layer name found and stored
   // }
  }
var w = new Window('dialog',decodeURI(file.name));
w.lb1 = w.add('listbox',undefined,'Names');
w.lb1.preferredSize= [500,200];
w.bu1 = w.add('button',undefined, 'Exit');
w.bu1.preferredSize = [500,35];
w.bu1.onClick=function(){
    w.close(0);
    }
Text.reverse();
IDs.reverse();
for (var i=0,len=Text.length;i<len;i++) { 
    w.lb1.add('item',  Text[i] + " ID = " + IDs[i]); 
};
//alert(Text.length + " ---- "  + IDs.length);
w.show();
};
 
main();
 

ianpinder

New Member
Messages
4
Likes
0
Hi again Paul,
Thanks so much for taking the time to look at this. As you said, this does pretty much exactly what I need for this. I have just removed the UI stuff, renamed some variables while trying to make sure I understood the logic, and put the results into array pairs for use in another script.

I needed to make a couple of amendments, mainly to get the script to skip over layer groups entirely (including the start and end of the groups) and it really highlighted my ignorance about the way in which you are reading and converting the binary data. I also found a PSD spec document (here) with the hope that I could work out how to access the relevant data to filter out specific layer types, but again my lack of knowledge on how the binary data is accessed and converted into readable strings really hindered me. It also seems that because the data is not a string as such, that my usual method of looking through the debugger to scan the data that needs parsing is not very helpful.

Can I ask your methodology when finding how to access the specific items, and if you had the time, maybe the bare-bones how the four read functions interact?

I have posted the slightly amended code below (though the functionality is essentially the same as yours).

No worries if you don't have the time to get into the details of this, I totally understand. Thanks again, you've already been a massive help in getting this working. Hopefully I can get to a point where I understand things a little deeper and don't have to bother kind people like yourself. :)

Kind regards,
- Ian


JavaScript:
function main() {

  file = File.openDialog("Please select PSD file.","PSD File:*.psd");
  if(!file.exists) return;
  file.open("r");
  file.encoding = 'BINARY';
  var PSD_data = file.read();
  file.close();
 
  var findResult;
  var indexPos =[];
  var name_arr= [];
  var ID_arr = [];

  var rex_name = new RegExp ('8BIMluni','g');

  while ((findResult = rex_name.exec(PSD_data)) != null) { // Find ALL occurencies of search string
    indexPos.push(findResult.index+(findResult[0].length));
  }

  var rex_id = new RegExp ('8BIMlyid','g');

  while ((findResult = rex_id.exec(PSD_data)) != null) { // Find ALL occurencies of search string
    ID_arr.push( readWord(PSD_data, findResult.index + 12));
  }

  function readByte(str, ofs) {
    return str.charCodeAt(ofs);
  }
  function readInt16(str, ofs) {
    return (readByte(str, ofs) << 8) + readByte(str, ofs+1);
  }
  function readWord(str, ofs) {
    return (readInt16(str, ofs) << 16) + readInt16(str, ofs+2);
  }
  function readUnicodeChar(str, ofs) {
    return String.fromCharCode(readInt16(str,  ofs));
  }
 
  for (var i = 0; i < indexPos.length; i++) { //loop through all finds
    var ofs = indexPos[i]; //offset of find
    var name_arrLength = readWord(PSD_data, (ofs+ 4));  //Read length of string at offset
    ofs += 8; //increment offset to suit
    var str = ''; //reset string to ''
 
    for (var j = 0; j < name_arrLength; j++) { //loop through string
      str += readUnicodeChar(PSD_data, ofs); // add char
      ofs += 2; //increment two bytes
    }
    
    if(!str.match(/<\/Layer group/)){ // end of a layer group
    }

    name_arr.push(str); //Layer name found and stored

  }

name_arr.reverse();
ID_arr.reverse();

var PSDlayersArr = [];
var thisArr = [];
for (var i=0,len=name_arr.length;i<len;i++) {
    thisArr = [name_arr[i],ID_arr[i]];
    PSDlayersArr.push(thisArr);
}

  alert(PSDlayersArr);
}
 
main();
 

Paul MR

The Script Master
Messages
374
Likes
203
Hi Ian, here the code is slightly changed...
Code:
function main() {

  file = File.openDialog("Please select PSD file.","PSD File:*.psd");
  if(!file.exists) return;
  file.open("r");
  file.encoding = 'BINARY';
  var PSD_data = file.read();
  file.close();
 
  var findResult;
  var indexPos =[];
  var name_arr= [];
  var ID_arr = [];

  var rex_name = new RegExp ('8BIMluni','g');

  while ((findResult = rex_name.exec(PSD_data)) != null) { // Find ALL occurencies of search string
    indexPos.push(findResult.index+(findResult[0].length));
  }

  var rex_id = new RegExp ('8BIMlyid','g');

  while ((findResult = rex_id.exec(PSD_data)) != null) { // Find ALL occurencies of search string
    ID_arr.push( readWord(PSD_data, findResult.index + 12));
  }

  function readByte(str, ofs) { // Read one byte at offset
    return str.charCodeAt(ofs);
  }
  function readInt16(str, ofs) { //read two byttes at offset
    return (readByte(str, ofs) << 8) + readByte(str, ofs+1);
  }
  function readWord(str, ofs) { //read four bytes at offset
    return (readInt16(str, ofs) << 16) + readInt16(str, ofs+2);
  }
  function readUnicodeChar(str, ofs) { //get character at offset
    return String.fromCharCode(readInt16(str,  ofs));
  }
 
  for (var i = 0; i < indexPos.length; i++) { //loop through all finds
    var ofs = indexPos[i]; //offset of find
    var name_arrLength = readWord(PSD_data, (ofs+ 4));  //Read length of string at offset
    ofs += 8; //increment offset to suit
    var str = ''; //reset string to ''
 
    for (var j = 0; j < name_arrLength; j++) { //loop through string
      str += readUnicodeChar(PSD_data, ofs); // add char
      ofs += 2; //increment two bytes
    }

    name_arr.push(str); //Layer name found and stored

  }
//reverse arrays so that ir reads Photoshop layers top down
name_arr.reverse();
ID_arr.reverse();

var PSDlayersArr = [];
var thisArr = [];
for (var i=0,len=name_arr.length;i<len;i++) {
    //remove layerset end 
    if(name_arr[i].match(/<\/Layer group/)) continue; 
    //If layer groups have group in thier name you could remove them here....
   // if(name_arr[i].match(/group/i)) continue; 
    thisArr = [name_arr[i],ID_arr[i]];
    PSDlayersArr.push(thisArr);
}

  alert(PSDlayersArr.join("\n"));
}
 
main();
What I normally do is look at the spec and use an hex editor to look at the file, then you can see how it is formatted.
Most of the field start with "8BIM" so it's easy to spot them. The RegExp ('8BIMxxxx','g'); will find all the positions in the file and give you a place to look at the file.
The position found by the RegExp gives the start position of the "8BIMxxxx" so you need to add 8 bytes to get to the data for that field.
If your layerset names start with Group you could remove them as above, I have amended the code so it removes the layerset ends.

Hope it's start for you.
Good luck with your project!
 

ianpinder

New Member
Messages
4
Likes
0
Thanks so much for this. I managed to get it working by checking for a field that only exists on group start and end layers ("8BIMlsct"), and then search back from that, to get the related layer ID. I then put these into an array to check against, to allow any group items to be removed from the main array. I used a fair bit of trial an error to find the right number of bytes to adjust it, and as you suggested I looked through the spec and a hex editor, and managed to at least find the other fields.

The thing that still baffles me a bit is where the number 12 comes from in this line:

readWord(PSD_data, findResult.index + 12)

Should I be able to simply count in the plain-text view of the hex editor? I will have to do a bit more research on this. I obviously need a better foundation of knowledge to build upon. :)

Anyway, the checking with a loop inside a loop probably isn't the most efficient way of doing that, but it seems fairly robust after a few tests.

Can't thank you enough for all your help. I would never had got this working without it.

Kind regards,
- Ian




JavaScript:
function main() {

    file = File.openDialog("Please select PSD file.","PSD File:*.psd");
    if(!file.exists) return;
    file.open("r");
    file.encoding = 'BINARY';
    var PSD_data = file.read();
    file.close();

    var findResult;
    var indexPos =[];
    var name_arr= [];
    var ID_arr = [];
    var GRP_arr = [];

    var rex_name = new RegExp ('8BIMluni','g');

    while ((findResult = rex_name.exec(PSD_data)) != null) { // Find ALL occurencies of search string

        indexPos.push(findResult.index+(findResult[0].length));
    }

    var rex_id = new RegExp ('8BIMlyid','g');


    while ((findResult = rex_id.exec(PSD_data)) != null) { // Find ALL occurencies of search string
        ID_arr.push( readWord(PSD_data, findResult.index + 12));
    }

    var rex_type = new RegExp ('8BIMlsct','g');

    // USE THE lsct SEARCH TO FIND ITEMS THAT ARE A GROUP START OR END AND ADD TO AN ARRAY
    while ((findResult = rex_type.exec(PSD_data)) != null) { // Find ALL occurencies of search string
        IDforType = readWord(PSD_data, findResult.index - 4);
        GRP_arr.push(IDforType);
    }

    function readByte(str, ofs) { // Read one byte at offset
        return str.charCodeAt(ofs);
    }
    function readInt16(str, ofs) { //read two byttes at offset
        return (readByte(str, ofs) << 8) + readByte(str, ofs+1);
    }
    function readWord(str, ofs) { //read four bytes at offset
        return (readInt16(str, ofs) << 16) + readInt16(str, ofs+2);
    }
    function readUnicodeChar(str, ofs) { //get character at offset
        return String.fromCharCode(readInt16(str,  ofs));
    }

    for (var i = 0; i < indexPos.length; i++) { //loop through all finds
        var ofs = indexPos[i]; //offset of find
        var name_arrLength = readWord(PSD_data, (ofs+ 4));  //Read length of string at offset
        ofs += 8; //increment offset to suit
        var str = ''; //reset string to ''

        for (var j = 0; j < name_arrLength; j++) { //loop through string
        str += readUnicodeChar(PSD_data, ofs); // add char
        ofs += 2; //increment two bytes
        }

        name_arr.push(str); //Layer name found and stored

    }

    //reverse arrays so that ir reads Photoshop layers top down
    name_arr.reverse();
    ID_arr.reverse();
    
    var PSDlayersArr = [];
    var thisArr = [];
    var a;
    var grpID;
    var thisID;
    var chk = false;

    // CREATE ARRAY OF LAYER NAMES AND ID
    for (i=0,len=name_arr.length;i<len;i++) {
  
        // CHECK IF LAYER ID IS IN THE LIST OF GROUP IDs
        for (a=0,len2=GRP_arr.length ; a<len2 ; a++) {
            grpID = GRP_arr[a];
            thisID = ID_arr[i];
            if (grpID === thisID) {
                chk = true;
                break;
            }
        }

        if (chk) {
            chk = false;
            continue;
        };

        thisArr = [name_arr[i],ID_arr[i]];

        PSDlayersArr.push(thisArr);
    }
    
    alert(PSDlayersArr.join("\n"));
}
  
  main();
 

Top