.abi / .ab1 reader

Hi all,

Does anyone know of an abi file format reader for Igor? (i.e. sequencing data + electropherograms)

Thanks!

Tom
No, but I did once write a reader for sequencing chromatograms in the SCF format. This was a long time ago and I've forgotten pretty much everything about it. I'm including the code below in case it can help you out.

Some quick searching revealed that the ABI format specification can be found online. I think you could use that to make your own reader in Igor but it depends on your familiarity with programming.

#pragma rtGlobals=1     // Use modern global access method.
 
 
// staden.sourceforge.net/scf-rfc.html
 
 
Structure SCFHeader
    uint32 magic_number     // Number of elements in Samples matrix
    uint32 samples              // Byte offset from start of file
    uint32 samples_offset       // Number of bases in Bases matrix
    uint32 bases                // OBSOLETE: No. bases in left clip (vector)
    uint32 bases_left_clip      // OBSOLETE: No. bases in right clip (qual)
    uint32 bases_right_clip // Byte offset from start of file
    uint32 bases_offset         // Number of bytes in Comment section
    uint32 comments_size        // Byte offset from start of file
    uint32 comments_offset      // "version.revision", eg '3' '.' '0' '0'
    char version[4]
    uint32 sample_size
    uint32 code_set
    uint32 private_size
    uint32 private_offset
    uint32 spare[18]
EndStructure
 
Structure SequenceEntry
    uint32 peak_index
    uchar prob_A
    uchar prob_C
    uchar prob_G
    uchar prob_T
    char   base
    uchar prob_sub
    uchar prob_ins
    uchar prob_del
EndStructure
 
Function ReadSCFHeader()
    
    variable refNum, i
    
    Open /D/R /F="Chromatograms:.scf" refNum
    if (strlen(S_fileName) == 0)
        return 0
    endif
    
    Open /R refNum as S_fileName
    
    Struct SCFHeader header
    
    string magicString = "1234"
    variable bigEndian
    
    FBinRead refNum, magicString
    if (StringMatch(magicString, ".scf") == 1)
        // the format is big endian
        bigEndian = 1
    else
        bigEndian = 0
    endif
    
    FSetPos refNum, 0
    
    if (bigEndian == 1) // big-endian
        FBinRead /B=2 refNum, header
    else
        FBinRead /B=3 refNum, header
    endif
    
    if (header.sample_size != 2)
        Close refNum
        Abort "The sample size is not 2"
    endif
    
    Make /W/O/N=(header.samples) ChromatogramA, ChromatogramC, ChromatogramG, ChromatogramT
    
    FSetPos refNum, header.samples_offset
    
    // read the raw chromatogram data
    if (bigEndian == 1)
        FBinRead /B=2 refNum, ChromatogramA
        FBinRead /B=2 refNum, ChromatogramC
        FBinRead /B=2 refNum, ChromatogramG
        FBinRead /B=2 refNum, ChromatogramT
    else
        FBinRead /B=3 refNum, ChromatogramA
        FBinRead /B=3 refNum, ChromatogramC
        FBinRead /B=3 refNum, ChromatogramG
        FBinRead /B=3 refNum, ChromatogramT
    endif
    
    ConvertSamplesToAbsoluteValues(ChromatogramA)
    ConvertSamplesToAbsoluteValues(ChromatogramC)
    ConvertSamplesToAbsoluteValues(ChromatogramG)
    ConvertSamplesToAbsoluteValues(ChromatogramT)
    
    // read the base assignments
    Make /T/O/N=(header.bases) BaseAssignments
    Make /O/N=(header.bases) BasePositions
    
    Struct SequenceEntry sequence
    
    FSetPos refNum, header.bases_offset
    
    for (i = 0; i < header.bases; i+=1)
        if (bigEndian == 1)
            FBinRead /B=2 refNum, sequence
        else
            FBinRead /B=3 refNum, sequence
        endif
        BaseAssignments[i] = GetMostLikelyBase(sequence)
        BasePositions[i] = sequence.peak_index
    endfor
    
    Close refNum
    
    // Display the chromatogram
    DoWindow /F ChromatogramViewer
    if (V_flag != 1)
        Display /N=ChromatogramViewer as "Chromatogram"
        AppendToGraph ChromatogramA, ChromatogramC, ChromatogramT, ChromatogramG
        ModifyGraph rgb(ChromatogramA)=(0,65535,0),rgb(ChromatogramC)=(0,0,65535);DelayUpdate
        ModifyGraph rgb(ChromatogramG)=(0,0,0)
    endif
    
End
 
Function ConvertSamplesToAbsoluteValues(samples)
    wave samples
    
    variable i, p_sample = 0
    variable nSamples = DimSize(samples, 0)
    
    for (i=0; i < nSamples ;i+=1)
        samples[i] = samples[i] + p_sample
        p_sample = samples[i]
    endfor
    p_sample = 0
    for (i=0; i < nSamples ;i+=1)
        samples[i] = samples[i] + p_sample
        p_sample = samples[i]
    endfor
End
    
Function /S GetMostLikelyBase(sequence)
    Struct SequenceEntry &sequence
    
    variable probA, probC, probT, probG
    
    probA = sequence.prob_A
    probT = sequence.prob_T
    probC = sequence.prob_C
    probG = sequence.prob_G
    
    variable highestProbability = max(max(probA, probT), max(probC, probG))
    
    if (highestProbability == probA)
        return "A"
    elseif (highestProbability == probT)
        return "T"
    elseif (highestProbability == probC)
        return "C"
    elseif (highestProbability == probG)
        return "G"
    endif
End