Filter SDK/avs2pcm

From Avisynth wiki
(Difference between revisions)
Jump to: navigation, search
(Line by line breakdown)
Line 186: Line 186:
 
     AVSValue res = env->Invoke("Import", AVSValue(&arg, 1));
 
     AVSValue res = env->Invoke("Import", AVSValue(&arg, 1));
 
     if (!res.IsClip()) {
 
     if (!res.IsClip()) {
       fprintf(stderr, "Error: '%s' didn't return a video clip.\n", infile);
+
       fprintf(stderr, "Error: '%s' didn't return a clip.\n", infile);
 
       FreeLibrary(avsdll);
 
       FreeLibrary(avsdll);
 
       return 1;
 
       return 1;
Line 201: Line 201:
 
     VideoInfo vi = clip->GetVideoInfo();
 
     VideoInfo vi = clip->GetVideoInfo();
  
The lines above are explained in [[Filter_SDK/avs2yuv|avs2yuv]], so won't be repeated here.
+
The lines above are explained in [[Filter_SDK/avs2yuv|avs2yuv]], so they won't be repeated here.
  
 
     if (!vi.HasAudio()) {
 
     if (!vi.HasAudio()) {
Line 209: Line 209:
 
     }
 
     }
  
Returns an error if the clip doesn't contain audio (in case it contains only video for example).  
+
Returns an error if the clip doesn't contain audio.  
  
 
     fprintf(stderr, " %s:\n", infile);
 
     fprintf(stderr, " %s:\n", infile);
Line 257: Line 257:
 
     __int64 bytes = vi.BytesFromAudioSamples(count);
 
     __int64 bytes = vi.BytesFromAudioSamples(count);
  
We will use [http://www.cplusplus.com/reference/cstdio/fwrite/ fwrite] to write 'count' audio samples to a file. So we will need to know the corresponding number of bytes which needs to be written. [[Cplusplus_API/VideoInfo|BytesFromAudioSamples]] gives the number of bytes and it is internally calculated as follows:
+
We will use [http://www.cplusplus.com/reference/cstdio/fwrite/ fwrite] to write 'count' audio samples to a file. So we will need to know the corresponding number of bytes which needs to be written. [[Cplusplus_API/VideoInfo|BytesFromAudioSamples]] gives the number of bytes and it is calculated internally as follows:
  
 
{| style="height:100px" border="1" cellpadding="4"
 
{| style="height:100px" border="1" cellpadding="4"
Line 264: Line 264:
 
|-  
 
|-  
 
| BytesPerChannelSample()
 
| BytesPerChannelSample()
| sizeof(unsigned char)
+
| = sizeof(unsigned char) = 1 byte [for 8 bit audio], <br>
= 8 bits = 1 byte [for 8 bit audio], <br>
+
= sizeof(signed short) = 2 bytes [for 16 bit audio], <br>
= sizeof(signed short) = 16 bits = 2 bytes [for 16 bit audio], <br>
+
= 3 bytes [for 24 bit audio], <br>
= 3 [for 24 bit audio] = 24 bits = 3 bytes, <br>
+
= sizeof(signed int) = 4 bytes [for 32 bit audio], <br>
= sizeof(signed int) = 32 bits = 4 bytes [for 32 bit audio], <br>
+
= sizeof(SFLOAT) = 4 bytes [for float audio; this is also 32 bit audio]
= sizeof(SFLOAT) = 32 bits = 4 bytes [for float audio]
+
 
|-  
 
|-  
 
| BytesPerAudioSample()
 
| BytesPerAudioSample()
Line 290: Line 289:
 
  ConvertAudioTox() // x = 8Bit, 16Bit, 24Bit, 32Bit and Float
 
  ConvertAudioTox() // x = 8Bit, 16Bit, 24Bit, 32Bit and Float
  
The samples are always written as little endian. So this means value = LSB MSB (the least significant byte first followed by the most significant byte).
+
The samples are always written to the pcm file as little endian. So this means value = LSB ... MSB (the least significant byte first and the most significant byte last).
  
x type
+
{| style="height:100px" border="1" cellpadding="4"
8 value: samples[0] = 255; samples[x] = 0
+
!width=20%| type (x)
16 value: samples[1]*256+samples[0] = 65535; samples[x+1]*256+samples[x] = 0
+
!width=40%| value of samples (abbreviated as s)
24 no separate type (samples between 0 and 2^24-1) ..
+
!width=40%| bytes written in file
32 unsigned int (samples between 0 and 2^32-1) FF FF FF FF .. 00 00 00 00 ..
+
|-
float (samples between -1.00000 and 1.000000) 1.000000 = 00 00 80 3F; -1.000000 = 00 00 80 BF
+
| 8Bit
 +
| s[0] = 255; s[end-1] = 0
 +
| FF .. 00 ..
 +
|-
 +
| 16Bit
 +
| s[1]*256+s[0] = 127*256+255 = 32767; s[end-1]*256+s[end-2] = 128*256+0 = 32768 (= -32767)
 +
| FF 7F .. 00 80 ..
 +
|-
 +
| 24Bit
 +
| s[2]*16^4+s[1]*16^2+s[0] = 16777215; s[end-1]*16^4+s[end-2]*16^2+s[end-3] = 0
 +
| FF FF FF .. 00 00 00 ..
 +
|-
 +
| 32Bit
 +
| s[3]*16^6+s[2]*16^4+s[1]*16^2+s[0] = 4294967295; s[end-1]*16^6+s[end-2]*16^4+s[end-3]*16^2+s[end-4] = 0
 +
| FF FF FF FF .. 00 00 00 00 ..
 +
|-
 +
| float <wrong>
 +
| s[3]*16^6+s[2]*16^4+s[1]*16^2+s[0] = 4294967295; s[end-1]*16^6+s[end-2]*16^4+s[end-3]*16^2+s[end-4] = 0
 +
| (samples between -1.00000 and 1.000000) 1.000000 = 00 00 80 3F; -1.000000 = 00 00 80 BF
 +
|}
  
# alternative
+
Above the samples are declared as unsigned char (regardless the samplebit) but filled by GetAudio as above, meaning an audio sample is written accross multiple samples 's'. This is the simplies and cleanest way to do, but you could have done also the following instead:
 +
 
 +
int BlockAlign = vi.BytesPerAudioSample();
 +
 +
switch (vi.SampleType()) {
 +
case SAMPLE_INT8 : {
 +
  unsigned char* samples = new unsigned char[BlockAlign*count];
 +
  clip->GetAudio(samples, start, count, env);
 +
  fwrite(samples, bytes, 1, out_fh);
 +
  delete[] samples;
 +
  break;
 +
}
 +
case SAMPLE_INT16 : {
 +
  signed short* samples = new signed short[channels*count];
 +
  clip->GetAudio(samples, start, count, env);
 +
  fwrite(samples, bytes, 1, out_fh);
 +
  delete[] samples;
 +
  break;
 +
}
 +
case SAMPLE_INT24 : {
 +
  unsigned char* samples = new unsigned char[3*channels*count];
 +
  clip->GetAudio(samples, start, count, env);
 +
  fwrite(samples, bytes, 1, out_fh);
 +
  delete[] samples;
 +
  break;
 +
}
 +
case SAMPLE_INT32 : {
 +
  signed int* samples = new signed int[channels*count];
 +
  clip->GetAudio(samples, start, count, env);
 +
  fwrite(samples, bytes, 1, out_fh);
 +
  delete[] samples;
 +
  break;
 +
}
 +
case SAMPLE_FLOAT : {
 +
  SFLOAT* samples = new SFLOAT[channels*count];
 +
  clip->GetAudio(samples, start, count, env);
 +
  fwrite(samples, bytes, 1, out_fh);
 +
  delete[] samples;
 +
  break;
 +
}
 +
}
  
x type y1 y2
+
It will produce the same output.
8 unsigned char (samples between 0 and 255) FF .. 00 ..
+
16 signed short (samples between -32767 and 32767) 32767 = 0x7FFF [written as 0xFF7F] -32767 = 65535 - 32767 = 32768 = 0x8000  [written as 0x0080]
+
24 no separate type (samples between 0 and 2^24-1) ...
+
32 unsigned int (samples between 0 and 2^32-1) FF FF FF FF .. 00 00 00 00 ..
+
float (samples between -1.00000 and 1.000000) 1.000000 = 00 00 80 3F; -1.000000 = 00 00 80 BF
+
  
 
     env->DeleteScriptEnvironment();
 
     env->DeleteScriptEnvironment();

Revision as of 15:31, 23 June 2014

avs2pcm reads a script and outputs raw audio (lpcm, that is lineair pcm). The byte order will be little endian, the sign (signed or unsiged) will depend on the bith depth and the channels will be interleaved.

Here's avs2pcm.cpp:

#include <stdio.h>
#include <Windows.h>
#include "avisynth.h"

#define MY_VERSION "Avs2PCM 0.01"

const AVS_Linkage *AVS_linkage = 0;

int __cdecl main(int argc, const char* argv[])
{
  const char* infile = NULL;
  const char* outfile = NULL;
  FILE* out_fh;

  if (!strcmp(argv[1], "-h")) {
    fprintf(stderr, MY_VERSION "\n"
    "Usage: avs2pcm in.avs out.pcm\n");
    return 2;
  } else {
    infile = argv[1];
    outfile = argv[2];
  }

  try {
    char* sample_type;
    typedef IScriptEnvironment* (__stdcall *DLLFUNC)(int);
    IScriptEnvironment* env;
    HMODULE avsdll = LoadLibrary("avisynth.dll");
    if (!avsdll) {
      fprintf(stderr, "failed to load avisynth.dll\n");
      return 2;
    }

    DLLFUNC CreateEnv = (DLLFUNC)GetProcAddress(avsdll, "CreateScriptEnvironment");
    if (!CreateEnv) {
      fprintf(stderr, "failed to load CreateScriptEnvironment()\n");
      FreeLibrary(avsdll);
      return 1;
    }

    env = CreateEnv(AVISYNTH_INTERFACE_VERSION);
    AVS_linkage = env->GetAVSLinkage();
    AVSValue arg(infile);
    AVSValue res = env->Invoke("Import", AVSValue(&arg, 1));
    if (!res.IsClip()) {
      fprintf(stderr, "Error: '%s' didn't return a video clip.\n", infile);
      FreeLibrary(avsdll);
      return 1;
    }

    PClip clip = res.AsClip();

    if (clip->GetVersion() < 5) {
      fprintf(stderr, "Error: too old version ('%d') of avisynth.dll loaded.\nplease install v2.60 or later.\n",
              clip->GetVersion());
      return 1;
    }

    VideoInfo vi = clip->GetVideoInfo();

    if (!vi.HasAudio()) {
      fprintf(stderr, "Error: '%s' video only clip.\n", infile);
      FreeLibrary(avsdll);
      return 1;
    }

    fprintf(stderr, " %s:\n", infile);
    fprintf(stderr, " %d Herz,\n", vi.audio_samples_per_second);
    fprintf(stderr, " %d channels,\n", vi.nchannels);
    fprintf(stderr, " %I64d audio samples,\n", vi.num_audio_samples);

    switch(vi.SampleType()) {
    case SAMPLE_INT8 : sample_type = "8 bit";
      break;
    case SAMPLE_INT16 : sample_type = "16 bit";
      break;
    case SAMPLE_INT24 : sample_type = "24 bit";
      break;
    case SAMPLE_INT32 :
    case SAMPLE_FLOAT : sample_type = "32 bit";
      break;
    default: sample_type = "unknown sample type";
      break;
    }

    fprintf(stderr, " %s", sample_type);

    out_fh = fopen(outfile, "wb");
    if (!out_fh) {
      fprintf(stderr, "fopen(\"%s\") failed", outfile);
      FreeLibrary(avsdll);
      return 1;
    }

    const __int64 start = 0;
    const __int64 count = vi.num_audio_samples;
    const int channels = vi.AudioChannels();
    __int64 bytes = vi.BytesFromAudioSamples(count);
    int BlockAlign = vi.AudioChannels() * vi.BytesPerAudioSample();

    unsigned char* samples = new unsigned char[BlockAlign*count];
    clip->GetAudio(samples, start, count, env);
    fwrite(samples, bytes, 1, out_fh);

    delete[] samples;
    env->DeleteScriptEnvironment();
    FreeLibrary(avsdll);
    AVS_linkage = 0;

  } catch(AvisynthError err) {
    fprintf(stderr, "\nAvisynth error:\n%s\n", err.msg);
    return 1;
  }

  fclose(out_fh);
  return 0;
}

Compile this file into an EXE named avs2pcm.exe. See compiling instructions. Now open the command line and go to the folder where avs2pcm.exe and your script (called example.avs here) are located. Our script:

Tone(length=1, frequency=2, samplerate=48000, channels=1, type="square", level=1.0) # float
ConvertAudioTo16Bit()

Type the following on the command line (the name of the output clip can be arbitrary in our application):

avs2pcm.exe example.avs output.pcm

So the output file will contain 48000 samples of 16-bit data (at 48 kHz, one channel). You can import it in AviSynth using the plugin NicAudio:

v = Blankclip(1000)
a = RaWavSource("D:\AviSynth\Plugins\avs2pcm\output.pcm", 48000, 16, 1) # little-endian
Audiodub(v,a).ConvertAudioTo16Bit().GetChannels(1) # Audiograph doesn't support 24/32bit nor multichannel
Audiograph(20)

Line by line breakdown

Here's a line-by-line breakdown of avs2pcm.cpp:

#include <stdio.h>
#include <Windows.h>
#include "avisynth.h"

#define MY_VERSION "Avs2PCM 0.01"

const AVS_Linkage *AVS_linkage = 0;

int __cdecl main(int argc, const char* argv[])
{
  const char* infile = NULL;
  const char* outfile = NULL;
  FILE* out_fh;

  if (!strcmp(argv[1], "-h")) {
    fprintf(stderr, MY_VERSION "\n"
    "Usage: avs2pcm in.avs out.pcm\n");
    return 2;
  } else {
    infile = argv[1];
    outfile = argv[2];
  }

  try {
    char* sample_type;
    typedef IScriptEnvironment* (__stdcall *DLLFUNC)(int);
    IScriptEnvironment* env;
    HMODULE avsdll = LoadLibrary("avisynth.dll");
    if (!avsdll) {
      fprintf(stderr, "failed to load avisynth.dll\n");
      return 2;
    }

    DLLFUNC CreateEnv = (DLLFUNC)GetProcAddress(avsdll, "CreateScriptEnvironment");
    if (!CreateEnv) {
      fprintf(stderr, "failed to load CreateScriptEnvironment()\n");
      FreeLibrary(avsdll);
      return 1;
    }

    env = CreateEnv(AVISYNTH_INTERFACE_VERSION);
    AVS_linkage = env->GetAVSLinkage();
    AVSValue arg(infile);
    AVSValue res = env->Invoke("Import", AVSValue(&arg, 1));
    if (!res.IsClip()) {
      fprintf(stderr, "Error: '%s' didn't return a clip.\n", infile);
      FreeLibrary(avsdll);
      return 1;
    }

    PClip clip = res.AsClip();

    if (clip->GetVersion() < 5) {
      fprintf(stderr, "Error: too old version ('%d') of avisynth.dll loaded.\nplease install v2.60 or later.\n",
              clip->GetVersion());
      return 1;
    }

    VideoInfo vi = clip->GetVideoInfo();

The lines above are explained in avs2yuv, so they won't be repeated here.

    if (!vi.HasAudio()) {
      fprintf(stderr, "Error: '%s' video only clip.\n", infile);
      FreeLibrary(avsdll);
      return 1;
    }

Returns an error if the clip doesn't contain audio.

    fprintf(stderr, " %s:\n", infile);
    fprintf(stderr, " %d Herz,\n", vi.audio_samples_per_second);
    fprintf(stderr, " %d channels,\n", vi.nchannels);
    fprintf(stderr, " %I64d audio samples,\n", vi.num_audio_samples);

    switch(vi.SampleType()) {
    case SAMPLE_INT8 : sample_type = "8 bit";
      break;
    case SAMPLE_INT16 : sample_type = "16 bit";
      break;
    case SAMPLE_INT24 : sample_type = "24 bit";
      break;
    case SAMPLE_INT32 :
    case SAMPLE_FLOAT : sample_type = "32 bit";
      break;
    default: sample_type = "unknown sample type";
      break;
    }

    fprintf(stderr, " %s", sample_type);

Some information about the clip is written to the console.

    out_fh = fopen(outfile, "wb");

Creates an empty binary file and opens it for writing. It returns a file pointer called 'out_fh' here. Nb, 'wb' means write mode and binary.

    if (!out_fh) {
      fprintf(stderr, "fopen(\"%s\") failed", outfile);
      FreeLibrary(avsdll);
      return 1;
    }

When failing (thus when out_fh is NULL) an error is written to the console.

    const __int64 start = 0;
    const __int64 count = vi.num_audio_samples;

This gives the number of audio samples in our stream.

    const int channels = vi.AudioChannels();

This gives the number of audio channels of our stream.

    __int64 bytes = vi.BytesFromAudioSamples(count);

We will use fwrite to write 'count' audio samples to a file. So we will need to know the corresponding number of bytes which needs to be written. BytesFromAudioSamples gives the number of bytes and it is calculated internally as follows:

function value
BytesPerChannelSample() = sizeof(unsigned char) = 1 byte [for 8 bit audio],

= sizeof(signed short) = 2 bytes [for 16 bit audio],
= 3 bytes [for 24 bit audio],
= sizeof(signed int) = 4 bytes [for 32 bit audio],
= sizeof(SFLOAT) = 4 bytes [for float audio; this is also 32 bit audio]

BytesPerAudioSample() AudioChannels() * BytesPerChannelSample()
BytesFromAudioSamples() num_audio_samples * BytesPerAudioSample()
    int BlockAlign = vi.BytesPerAudioSample();

    unsigned char* samples = new unsigned char[BlockAlign*count];
    clip->GetAudio(samples, start, count, env);
    fwrite(samples, bytes, 1, out_fh);
    delete[] samples;

There are a few ways to write audio to a file. The simpliest one is as above. Let's look at what happens with our data:

Tone(length=1, frequency=2, samplerate=48000, channels=1, type="square", level=1.0) # float
ConvertAudioTox() // x = 8Bit, 16Bit, 24Bit, 32Bit and Float

The samples are always written to the pcm file as little endian. So this means value = LSB ... MSB (the least significant byte first and the most significant byte last).

type (x) value of samples (abbreviated as s) bytes written in file
8Bit s[0] = 255; s[end-1] = 0 FF .. 00 ..
16Bit s[1]*256+s[0] = 127*256+255 = 32767; s[end-1]*256+s[end-2] = 128*256+0 = 32768 (= -32767) FF 7F .. 00 80 ..
24Bit s[2]*16^4+s[1]*16^2+s[0] = 16777215; s[end-1]*16^4+s[end-2]*16^2+s[end-3] = 0 FF FF FF .. 00 00 00 ..
32Bit s[3]*16^6+s[2]*16^4+s[1]*16^2+s[0] = 4294967295; s[end-1]*16^6+s[end-2]*16^4+s[end-3]*16^2+s[end-4] = 0 FF FF FF FF .. 00 00 00 00 ..
float <wrong> s[3]*16^6+s[2]*16^4+s[1]*16^2+s[0] = 4294967295; s[end-1]*16^6+s[end-2]*16^4+s[end-3]*16^2+s[end-4] = 0 (samples between -1.00000 and 1.000000) 1.000000 = 00 00 80 3F; -1.000000 = 00 00 80 BF

Above the samples are declared as unsigned char (regardless the samplebit) but filled by GetAudio as above, meaning an audio sample is written accross multiple samples 's'. This is the simplies and cleanest way to do, but you could have done also the following instead:

int BlockAlign = vi.BytesPerAudioSample();

switch (vi.SampleType()) {
case SAMPLE_INT8 : {
  unsigned char* samples = new unsigned char[BlockAlign*count];
  clip->GetAudio(samples, start, count, env);
  fwrite(samples, bytes, 1, out_fh);
  delete[] samples;
  break;
}
case SAMPLE_INT16 : {
  signed short* samples = new signed short[channels*count];
  clip->GetAudio(samples, start, count, env);
  fwrite(samples, bytes, 1, out_fh);
  delete[] samples;
  break;
}
case SAMPLE_INT24 : {
  unsigned char* samples = new unsigned char[3*channels*count];
  clip->GetAudio(samples, start, count, env);
  fwrite(samples, bytes, 1, out_fh);
  delete[] samples;
  break;
}
case SAMPLE_INT32 : {
  signed int* samples = new signed int[channels*count];
  clip->GetAudio(samples, start, count, env);
  fwrite(samples, bytes, 1, out_fh);
  delete[] samples;
  break;
}
case SAMPLE_FLOAT : {
  SFLOAT* samples = new SFLOAT[channels*count];
  clip->GetAudio(samples, start, count, env);
  fwrite(samples, bytes, 1, out_fh);
  delete[] samples;
  break;
}
}

It will produce the same output.

    env->DeleteScriptEnvironment();
    FreeLibrary(avsdll);
    AVS_linkage = 0;

    } catch(AvisynthError err) {
      fprintf(stderr, "\nAvisynth error:\n%s\n", err.msg);
      return 1;
    }

    fclose(out_fh);
    return 0;
}
Personal tools