Browse Source

Add documentation

master
Nick Rozinsky 10 months ago
parent
commit
dfd95c5470
1 changed files with 241 additions and 0 deletions
  1. +241
    -0
      Documentation.md

+ 241
- 0
Documentation.md View File

@@ -148,3 +148,244 @@ int totalRows;
Holds number of written records.

---
```d
this(File fn, BamHeader bamHead)
{
this(fn, bamHeader, DEFAULT_SIZE);
}

this(File fn, BamHeader bamHead, int groupSize)
{
rowGroupSize = groupSize;
buffer.length = rowGroupSize;
file = fn;
file.rawWrite(CBAM_MAGIC);
bamHeader = bamHead;
numRows = 0;
totalRows = 0;
}
```
Constructors.

---

```d
~this()
{
close();
}

void close()
{
if (!file.isOpen)
return;
flushBuf();
writeMeta();
file.close();
}
```

Destructor and *close* function, it may be called manually.

---

```d
void addRecord(RawReadBlob record)
{
if (numRows == rowGroupSize)
{
flushBuf();
numRows = 0;
}
buffer[numRows++] = record;
}
```

Used to add records to CBAM file. Flushes buffer to file automatically.

---

```d
void flushBuf()
{
writeRowGroup(buffer[0..numRows], numRows);
}
```

Calls *writeRowGroup* to write only valid records from buffer to file.

---

```d
void writeRowGroup(RawReadBlob[] recordBuf, uint num_rows){...}
```

Following code chunks are from this function.

Manages writing formed rowgroup to the file. Fills bytes buffer and writes it to file.

```d
RowGroupMeta rowGroupMeta;
rowGroupMeta.num_rows = num_rows;
totalRows += numRows;

uint total_size = 0;
```

Preparation of meta class and fields.

```d
ubyte[] buf;
buf.length = num_rows * int.sizeof;
```

Byte buffer. Used to hold bytes extracted from BAM reads fields before writing. Initialized to the length required to hold all rowgroup's records fields values in byte form. Every BAM record fixed size field is of 4 byte size, hence we can preallocate the buffer.

Notice, that to avoid reallocation, byte buffer will be filled firstly with fixed size fields, since they all occupy same space, and only then variable size fields.

```d
foreach (columnType; EnumMembers!ColumnTypes){...}
```

Foreach loop which manages byte buffer filling. Iterates on previously defined *ColumnTypes* enum. Values in Enum ordered in such way, that byte buffer won't be reallocated until variable size fields come - first nine values in enum represent fixed size fields, and they get processed in loop first.

```d
foreach (columnType; EnumMembers!ColumnTypes)
{
if (columnType < ColumnTypes.read_name)
{
rowGroupMeta.columnsOffsets[columnType] = file.tell(); // line 1
for (int i = 0; i < num_rows; ++i)
{
writeFieldToBuf(buf, columnType, recordBuf[i], i * simple_field_size);
}
rowGroupMeta.uncompressedColSizes[columnType] = buf.length;
rowGroupMeta.columnsSizes[columnType] = writeColumn(
buf[0 .. num_rows * simple_field_size]);
}
else
{...}
}
```

All values in enum before *ColumnTypes.read_name* represent fixed size fields.

```d
rowGroupMeta.columnsOffsets[columnType] = file.tell();
```

Saves position in file where column chunk begin. Notice, that there were reports that *file.tell()* may return wrong values.

```d
for (int i = 0; i < num_rows; ++i)
{
writeFieldToBuf(buf, columnType, recordBuf[i], i * simple_field_size);
}
```

Extracts record field corresponding to specified column and writes it to the byte buffer at offset.

```d
rowGroupMeta.uncompressedColSizes[columnType] = buf.length;
rowGroupMeta.columnsSizes[columnType] = writeColumn(
buf[0 .. num_rows * simple_field_size]);
```

Saves uncompressed and compressed column chunk sizes.

```d
foreach (columnType; EnumMembers!ColumnTypes)
{
if (columnType < ColumnTypes.read_name)
{...}
else
{
buf.length = calcBufSize(columnType, recordBuf) + int.sizeof * num_rows;
rowGroupMeta.columnsOffsets[columnType] = file.tell();

int currentPos = 0;
for (int i = 0; i < num_rows; ++i)
{
writeVarFieldToBuf(buf, columnType, recordBuf[i], currentPos);
}
rowGroupMeta.uncompressedColSizes[columnType] = buf.length;
rowGroupMeta.columnsSizes[columnType] = writeColumn(buf[0 .. currentPos]);
}
}
```

Writes variable size fields. Calculates the byte buffer length needed to keep data. In comparison to fixed size part, has *currentPos* for storing offset in byte buffer, since fields are variable size and offset can't be simply calculated.

```d
rowGroupMeta.total_byte_size = reduce!((a, b) => a + b)(rowGroupMeta.columnsSizes);
fileMeta.rowGroups ~= rowGroupMeta;
```

Calculates total byte size of rowgroup and saves rowgroup meta to the file array of rowgroups meta.

---

```d
void writeFieldToBuf(ubyte[] buf, ColumnTypes columnType, RawReadBlob readBlob, int offset)
```

Extract fields bytes and saves them to byte buffer.

```d
switch (columnType)
{
case ColumnTypes._refID:
{
std.bitmanip.write!(int, Endian.littleEndian, ubyte[])(buf,
readBlob.refid, offset);
break;
}
case ColumnTypes._pos:
{
std.bitmanip.write!(int, Endian.littleEndian, ubyte[])(buf, readBlob.pos, offset);
break;
}
case ColumnTypes._blob_size:
{
uint blob_size = cast(int) readBlob._data.length;
std.bitmanip.write(buf, blob_size, offset);
break;
}
case ColumnTypes._bin_mq_nl:
{
buf[offset .. offset + simple_field_size] = readBlob.raw_bin_mq_nl;
break;
}
case ColumnTypes.sequence_length:
{
buf[offset .. offset + simple_field_size] = readBlob.raw_sequence_length;
break;
}
case ColumnTypes._flag_nc:
{
buf[offset .. offset + simple_field_size] = readBlob.raw_flag_nc;
break;
}
case ColumnTypes._next_pos:
{
buf[offset .. offset + simple_field_size] = readBlob.raw_next_pos;
break;
}
case ColumnTypes._next_refID:
{
buf[offset .. offset + simple_field_size] = readBlob.raw_next_refID;
break;
}
case ColumnTypes._tlen:
{
buf[offset .. offset + simple_field_size] = readBlob.raw_tlen;
break;
}
default:
{
assert(false, "No such type exists");
}
}
```



Loading…
Cancel
Save