//============================================================================
//
//  project:    freedb
//  file:       analyze-archive.c
//  author:     Andrew Smith
//  date:       2005-01-01
//  language:   C
//
//  NOTES
//
//  FreeDB music CD database tarball conversion and analysis.
//
//  Copyright 2005, Andrew Smith <freedb@asmith.id.au>
//
//
//----------------------------------------------------------------------------
//
//  This program is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published
//  by the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful, but
//  WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//  General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; if not, write to the Free Software Foundation,
//  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//
//============================================================================

#include <stdio.h>

enum tTarSizes
{
    kMaxName = 100,
    kMaxMode = 8,
    kMaxUid = 8,
    kMaxGid = 8,
    kMaxSize = 12,
    kMaxTime = 12,
    kMaxChkSum = 8,
    kMaxTypeFlag = 1,
    kMaxLinkName = 100,
    kMaxMagic = 6,
    kMaxVersion = 2,
    kMaxUName = 32,
    kMaxGName = 32,
    kMaxDevMajor = 8,
    kMaxDevMinor = 8,
    kMaxPrefix = 155
};

enum tTarTypes
{
    kARegType = 0,
    kRegType = '0',
    kLnkType = '1',
    kSymType = '2',
    kChrType = '3',
    kBlkType = '4',
    kDirType = '5',
    kFifoType = '6',
    kContType = '7'
};

typedef struct
{
    char fName[kMaxName];
    char fMode[kMaxMode];
    char fUid[kMaxUid];
    char fGid[kMaxGid];
    char fSize[kMaxSize];
    char fTime[kMaxTime];
    char fChkSum[kMaxChkSum];
    char fTypeFlag;
    char fLinkName[kMaxLinkName];
    char fMagic[kMaxMagic];
    char fVersion[kMaxVersion];
    char fUName[kMaxUName];
    char fGName[kMaxGName];
    char fDevMajor[kMaxDevMajor];
    char fDevMinor[kMaxDevMinor];
    char fPrefix[kMaxPrefix];
    char fFiller[12];
} tTarRec;

#define kBlockSize sizeof(tTarRec)

static int gFileSize,gFileTime;

static tTarRec gTar;


//----------------------------------------------------------------------------

#define kMaxBuffer (2048 * kBlockSize)
#define kFramesPerSecond 75

enum tBoolean {FALSE, TRUE};

enum tFields
{
    kLength, kRevision, kProcessor, kSubmitter,
    kDiscId, kDArtist, kDTitle, kDYear, kDGenre, kExtD,
    kFrame, kTArtist, kTTitle, kExtT,
    kMaxField
};

enum tErrors
{
    kLineError = 1,
    kFileError,
    kArchiveError,
    kLengthError,
    kFieldError,
    kBufferError,
    kCommentError
};

enum tTolerance {kNeeded, kEolOk, kEofOk};

static char gBuffer[kMaxBuffer], gInput[kMaxBuffer];
static char *gNext, *gFrom, *gFields[kMaxField];
static int  gField, gTrack, gCount = 0;


//----------------------------------------------------------------------------
// initialise variables

static void
Initialise(void)
{
    for (gField = 0; gField < kMaxField; ++gField)
        gFields[gField] = NULL;
    gNext = gBuffer;
    gFrom = gInput;
    gTrack = -1;
    gCount++;
}


//----------------------------------------------------------------------------
// map category code to a letter of the alphabet

static int gCatLength[] = {6,10,8,5,5,5,5,7,7,5,11};

static char
GetCategory(aDirP)
    char *aDirP;
{
    switch (*aDirP)
    {
        case 'b': // blues=A
            return 'A';

        case 'c': // classical=B, country=C
            return *(aDirP+1) == 'l' ? 'B' : 'C';

        case 'd': // data=D
            return 'D';

        case 'f': // folk=E
            return 'E';

        case 'j': // jazz=F
            return 'F';

        case 'm': // misc=G
            return 'G';

        case 'n': // newage=H
            return 'H';

        case 'r': // reggae=I, rock=J
            return *(aDirP+1) == 'e' ? 'I' : 'J';

        case 's': // soundtrack=K
            return 'K';
    }
}


//----------------------------------------------------------------------------
// convert octal digits to an integer

static int
GetOctal(aDigitP)
    char *aDigitP;
{
    int vValue = 0;
    while (*aDigitP >= '0' && *aDigitP <= '7')
        vValue = vValue * 8 + *aDigitP++ - '0';
    return vValue;
}


//----------------------------------------------------------------------------
// convert decimal digits to an integer

static int
GetDecimal(aDigitP)
    char *aDigitP;
{
    int vValue = 0;
    while (*aDigitP >= '0' && *aDigitP <= '9')
        vValue = vValue * 10 + *aDigitP++ - '0';
    return vValue;
}


//----------------------------------------------------------------------------
// convert hexadecimal digits to an integer

static unsigned
GetHex(aDigitP)
    char *aDigitP;
{
    unsigned vValue = 0;
    while (TRUE)
        if (*aDigitP >= '0' && *aDigitP <= '9')
            vValue = vValue * 16 + *aDigitP++ - '0';
        else if (*aDigitP >= 'a' && *aDigitP <= 'f')
            vValue = vValue * 16 + *aDigitP++ - 'a' + 10;
        else
            return vValue;
}


//----------------------------------------------------------------------------
// check for a string
// - starting or ending with a space
// - containing runs of spaces
// - containing the track number
// - containing control characters (tab, return or newline)

static int
DirtyText(aTextP,aTrack)
    char *aTextP;
    int aTrack;
{
    char vDigit1 = 0, vDigit2 = 0;

    if (*aTextP == ' ') return TRUE;

    if (aTrack >= 10)
    {
        vDigit2 = '0' + aTrack % 10;
        vDigit1 = '0' + aTrack / 10;
    }
    else if (aTrack >= 1)
        vDigit1 = '0' + aTrack;

    for (; *aTextP; ++aTextP)
    {
        if ((*aTextP == ' ' && (*(aTextP+1) == ' ' ||
                                *(aTextP+1) == 0)) ||
            (*aTextP == '\\' && (*(aTextP+1) == 't' ||
                                 *(aTextP+1) == 'n' ||
                                 *(aTextP+1) == 'r')))
            return TRUE;
        else if (*aTextP == vDigit1 && (vDigit2 == 0 || *(aTextP+1) == vDigit2))
            return TRUE;
    }
    return FALSE;
}


//----------------------------------------------------------------------------
// output disc and track records
// disc records have an empty track field
// track records begin with a digit

static void
OutputDisc(void)
{
    int vTrack, vDirty, vThisFrame = 0, vNextFrame = -1;
    char vCategory = GetCategory(gTar.fName);
    unsigned vDiscId = GetHex(gTar.fName + gCatLength[vCategory - 'A']);
    char *vArtist, *vTitle, *vSep, vVarious = 'f';

    // get the first frame offset

    if (gFields[kFrame])
    {
        vThisFrame = GetDecimal(gFields[kFrame]);
        while (*gFields[kFrame]++);
    }

    for (vTrack = 0; vTrack <= gTrack; ++vTrack)
    {
        // separate artist and title fields

        vArtist = vTitle = "";
        if (gFields[kTTitle])
        {
            for (vSep = gFields[kTTitle]; *vSep; ++vSep)
                if (*vSep == ' ' && *(vSep + 1) == '/' && *(vSep + 2) == ' ')
                    break;
            if (*vSep)
            {
                vVarious = 't'; // note the presence of track artists
                vArtist = gFields[kTTitle];
                gFields[kTTitle] = vTitle = vSep + 3;
                *vSep = 0;
            }
            else
                vTitle = gFields[kTTitle];
        }

        // get the next frame offset

        if (gFields[kFrame] && vTrack < gTrack)
        {
                vNextFrame = GetDecimal(gFields[kFrame]);
                while (*gFields[kFrame]++);
        }
        else
        {
            gFields[kFrame] = NULL;
            if (gFields[kLength])
                vNextFrame = GetDecimal(gFields[kLength]) * kFramesPerSecond;
            else
                vNextFrame = 0;
        }

        // check whether fields will need to be cleaned

        vDirty = DirtyText(vArtist,vTrack+1) || DirtyText(vTitle,vTrack+1);

        // output a track record

        printf("%d\t%d\t%d\t%d\t%s\t%s\t%s\t%c\n",
               vTrack,
               gCount,
               vThisFrame,
               vNextFrame - vThisFrame,
               vArtist,
               vTitle,
               gFields[kExtT] ? gFields[kExtT] : "",
               vDirty? 't' : 'f');
        if (gFields[kTTitle]) while (*gFields[kTTitle]++);
        if (gFields[kExtT]) while (*gFields[kExtT]++);

        vThisFrame = vNextFrame;
    }

    // separate artist and title fields in disc records

    vArtist = vTitle = "";
    if (gFields[kDTitle])
    {
        for (vSep = gFields[kDTitle]; *vSep; ++vSep)
            if (*vSep == ' ' && *(vSep + 1) == '/' && *(vSep + 2) == ' ')
                break;
        if (*vSep)
        {
            vArtist = gFields[kDTitle];
            vTitle = vSep + 3;
            *vSep = 0;
        }
        else
            vTitle = gFields[kDTitle];
    }

    // check whether fields will need to be cleaned

    vDirty = DirtyText(vArtist,0) || DirtyText(vTitle,0);

    // output a disc record

    printf("\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
           "\t%s\t%s\t%c\t%d\t%d\t%d\t%d\t%c\t%c\n",
           gCount,
           gFields[kLength] ? gFields[kLength] : "",
           gFields[kRevision] ? gFields[kRevision] : "0",
           gFields[kProcessor] ? gFields[kProcessor] : "",
           gFields[kSubmitter] ? gFields[kSubmitter] : "",
           gFields[kDiscId] ? gFields[kDiscId] : "",
           vArtist,
           vTitle,
           gFields[kDYear] ? gFields[kDYear] : "",
           gFields[kDGenre] ? gFields[kDGenre] : "",
           gFields[kExtD] ? gFields[kExtD] : "",
           vCategory,
           vDiscId,
           gFileTime,
           gFileSize,
           gTrack + 1,
           vVarious,
           vDirty ? 't' : 'f');
}


//----------------------------------------------------------------------------
// output a link record
// link records begin with a letter

static void
OutputLink(void)
{
    char vFileCat = GetCategory(gTar.fName);
    char vLinkCat = GetCategory(gTar.fLinkName);
    unsigned vFileId = GetHex(gTar.fName + gCatLength[vFileCat - 'A']);
    unsigned vLinkId = GetHex(gTar.fLinkName + gCatLength[vLinkCat - 'A']);

    printf("%c\t%d\t%d\t%c\t%d\n",
           vFileCat,
           vFileId,
           gFileTime,
           vLinkCat,
           vLinkId);
}


//----------------------------------------------------------------------------
// handle an error

static void
HandleError(aError)
    enum tErrors aError;
{
    if (aError)
    {
        char *vThis;
        for (vThis = gBuffer; vThis < gNext; ++vThis)
            putchar(*vThis);
    }
    exit(aError);
}


//----------------------------------------------------------------------------
// retrieve the next character

static int
GetNext(aTolerance)
    enum tTolerance aTolerance;
{
    int vChar = *gFrom++;

    switch (vChar)
    {
        case 0:
            HandleError(kFileError);

        case '\n':
            if (aTolerance < kEolOk)
                HandleError(kLineError);

        default:
            *gNext = vChar;
            break;
    }
    return vChar;
}


//----------------------------------------------------------------------------
// commit a character, escaping special characters

static void
PutNext(void)
{
    static int vSlashF = FALSE;

    if (gNext >= gBuffer + kMaxBuffer - 2)
    {
        OutputDisc();
        HandleError(kBufferError);
    }

    // put out a pending back slash

    if (vSlashF)
    {
        vSlashF = FALSE;
        switch (*gNext)
        {
            case 't':
            case 'r':
            case 'n':
            case '\\':
                ++gNext;
                return;

            default:
                *(gNext+1) = *gNext;
                *gNext++ = '\\';
                break;
        }
    }

    // escape tabs, returns and backslashes
    // null terminate lines

    switch (*gNext)
    {
        case '\n':
            *gNext++ = 0;
            break;

        case '\t':
            *gNext++ = '\\';
            *gNext++ = 't';
            break;

        case '\r':
            *gNext++ = '\\';
            *gNext++ = 'r';
            break;

        case '\\':
            vSlashF = TRUE;

        default:
            ++gNext;
            break;
    }
}


//----------------------------------------------------------------------------
// save a line, escaping special characters

static void
CopyLine(void)
{
    do PutNext();
    while (GetNext(kEolOk) != '\n');
    PutNext();
}


//----------------------------------------------------------------------------
// discard a line

static void
SkipLine(void)
{
    while (GetNext(kEolOk) != '\n');
}


//----------------------------------------------------------------------------
// save a field

static void
CopyField(void)
{
    if (*gNext == '\n')
        PutNext();
    else
        CopyLine();
}


//----------------------------------------------------------------------------
// skip a disc field and output the rest of the line

static void
SkipDisc(aField)
    enum tFields aField;
{
    while (GetNext(kNeeded) != '=');

    if (aField == gField)
        --gNext;
    else
    {
        gFields[aField] = gNext;
        gField = aField;
    }

    GetNext(kEolOk);
    CopyField();
}


//----------------------------------------------------------------------------
// skip a track field and output the rest of the line

static void
SkipTrack(aField)
    enum tFields aField;
{
    int vTrack = 0;

    while (GetNext(kNeeded), *gNext < '0' || *gNext > '9');

    do vTrack = vTrack * 10 + (*gNext - '0');
    while (GetNext(kNeeded), *gNext >= '0' && *gNext <= '9');

    if (*gNext != '=')
        HandleError(kFieldError);

    if (aField == gField && vTrack == gTrack)
        --gNext;
    else
    {
        if (aField != gField)
        {
            gFields[aField] = gNext;
            gField = aField;
        }
        gTrack = vTrack;
    }

    GetNext(kEolOk);
    CopyField();
}


//----------------------------------------------------------------------------
// skip past a colon and white space*/

static int
SkipColon(aField)
    enum tFields aField;
{
    while (GetNext(kEolOk), *gNext != ':' && *gNext != '\n');

    if (*gNext == '\n')
        return FALSE;

    while (GetNext(kEolOk), *gNext == ' ' || *gNext == '\t');

    gFields[aField] = gNext;
    gField = aField;
    return TRUE;
}


//----------------------------------------------------------------------------
// send only a number

static void
CopyNumber(void)
{
    do PutNext();
    while (GetNext(kEolOk), *gNext >= '0' && *gNext <= '9');

    while (*gNext != '\n')
        GetNext(kEolOk);

    PutNext();
}


//----------------------------------------------------------------------------
// skip blanks and analyse a comment line

static void
SkipComment(void)
{
    while (GetNext(kEolOk), *gNext == ' ' || *gNext == '\t');
    switch (*gNext)
    {
        case 'D': //# Disc length:
            if (SkipColon(kLength))
                CopyNumber();
            break;

        case 'R': //# Revision:
            if (SkipColon(kRevision))
                CopyNumber();
            break;

        case 'P': //# Processed by:
            if (SkipColon(kProcessor))
                CopyField();
            break;

        case 'S': //# Submitted via:
            if (SkipColon(kSubmitter))
                CopyField();
            break;

        case '\n':
            break;

        default: //# [0-9]+
            if (*gNext >= '0' && *gNext <= '9')
            {
                if (gField != kFrame)
                {
                    gFields[kFrame] = gNext;
                    gField = kFrame;
                }
                CopyNumber();
            }
            else
                SkipLine();
            break;
    }
}


//----------------------------------------------------------------------------
// process a cddb file

static void
CopyFile(void)
{
    while (TRUE)
        switch (GetNext(kEofOk))
        {
            case '#':
                SkipComment();
                break;

            case 'D':
                switch (GetNext(kNeeded))
                {
                    case 'I': //DISCID=
                        SkipDisc(kDiscId);
                        break;

                    case 'T': //DTITLE[0-9]+=
                        SkipDisc(kDTitle);
                        break;

                    case 'Y': //DYEAR=
                        SkipDisc(kDYear);
                        break;

                    case 'G': //DGENRE=
                        SkipDisc(kDGenre);
                        break;

                    default:
                        HandleError(kFieldError);
                }
                break;

            case 'T': //TTITLE[0-9]+=
                SkipTrack(kTTitle);
                break;

            case 'E':
                GetNext(kNeeded);
                GetNext(kNeeded);
                switch (GetNext(kNeeded))
                {
                    case 'D': //EXTD=
                        SkipDisc(kExtD);
                        break;

                    case 'T': //EXTT=
                        SkipTrack(kExtT);
                        break;

                    default:
                        HandleError(kFieldError);
                }
                break;

            case 'P': //PLAYORDER=
                OutputDisc();
                return;

            default:
                SkipLine();
                break;
        }
}


//----------------------------------------------------------------------------
// read bytes into a buffer

static void
GetBlocks(aTolerance,aBufferP,aBlocks)
    enum tTolerance aTolerance;
    char *aBufferP;
    size_t aBlocks;
{
    if (fread(aBufferP,kBlockSize,aBlocks,stdin) < aBlocks)
        HandleError(aTolerance < kEofOk || ferror(stdin) ? kLengthError : 0);
}


//----------------------------------------------------------------------------
// read a file header and contents from a tar archive on stdin

static void
GetFile(void)
{
    Initialise();
    GetBlocks(kEofOk,&gTar,1);
    gFileSize = GetOctal(gTar.fSize);
    gFileTime = GetOctal(gTar.fTime);
    switch (gTar.fTypeFlag)
    {
        case kARegType:
        case kRegType:
        case kContType:
            if (gFileSize >= kMaxBuffer)
                HandleError(kBufferError);
            if (gFileSize)
            {
                size_t vBlocks = (gFileSize + kBlockSize - 1) / kBlockSize;
                GetBlocks(kNeeded,gInput,vBlocks);
                gInput[gFileSize] = 0;
                if (GetNext(kEofOk) == '#' &&
                    GetNext(kEofOk) == ' ' &&
                    GetNext(kEofOk) == 'x' &&
                    GetNext(kEofOk) == 'm' &&
                    GetNext(kEofOk) == 'c' &&
                    GetNext(kEofOk) == 'd')
                {
                    SkipLine();
                    CopyFile();
                }
            }
            break;

        case kLnkType:
        case kSymType:
            OutputLink();
            break;

        case kDirType:
            break;

        case kChrType:
        case kBlkType:
        case kFifoType:
        default:
            fprintf(stderr,"bad archive type %s %x\n",gTar.fName,gTar.fTypeFlag);
            HandleError(kArchiveError);
            break;
    }
}


//----------------------------------------------------------------------------
// search for the start of cddb file marker

extern int
main(void)
{
    while (TRUE)
        GetFile();
}


//============================================================================
