www.digitalmars.com         C & C++   DMDScript  

D.gnu - Bug with align(1) and ulong

reply "Jean-Baptiste Boric" <jblbeurope gmail.com> writes:
Hi there,

I'm a C/C++ programmer that decided to try D for a small 
side-project (it  boils down to extracting informations from a 
NTFS partition).

After exhausting all others explanations, I think I found a bug 
in gdc : when I use ulong fields in structs with align(1) they 
are not properly aligned ; even worse, they "bleed" three bytes 
onto the next field.

Here's the minimal test code :

---
import std.stdio;
import std.stream;
import std.conv;

//
// On-disk data structures
//

// NTFS BIOS parameter block
struct NTFS_BootSector_BPB {
	align (1) {
		ushort		bytesPerSector;
		ubyte		sectorsPerCluster;
		ubyte[7]	_unused1;
		ubyte		mediaDescriptor;
		ubyte[18]	_unused2;
		ulong		totalSectors;
		ulong		logicalClusterNumberMFT;
		ulong		logicalClusterNumberMFTmir;
		uint		clustersPerMFTRecord;
		uint		clustersPerIndexBuffer;
		ulong		volumeSerialNumber;
		ubyte[4]	_unused3;
	}
}

// NTFS boot sector
struct NTFS_BootSector {
	align (1) {
		ubyte[3]		_unused1;
		ubyte[8]		oemID;
		NTFS_BootSector_BPB 	bpb;
		ubyte[426] 		_unused2;
		ushort	 		signature;
	}
}

// NTFS BIOS parameter block 2
struct NTFS_BootSector_BPB2 {
	align (1) {
		ushort		bytesPerSector;
		ubyte		sectorsPerCluster;
		ubyte[7]	_unused1;
		ubyte		mediaDescriptor;
		ubyte[18]	_unused2;
		ubyte[8]	totalSectors;
		ubyte[8]	logicalClusterNumberMFT;
		ubyte[8]	logicalClusterNumberMFTmir;
		uint		clustersPerMFTRecord;
		uint		clustersPerIndexBuffer;
		ubyte[8]	volumeSerialNumber;
		ubyte[4]	_unused3;
	}
}

// NTFS boot sector
struct NTFS_BootSector2 {
	align (1) {
		ubyte[3]		_unused1;
		ubyte[8]		oemID;
		NTFS_BootSector_BPB2 	bpb;
		ubyte[426] 		_unused2;
		ushort	 		signature;
	}
}

int main(char[][] args) {
	if(args.length != 2) {
     		writeln("Usage: bug INPUT_FILE");
     		return -1;
     	}
     	
	Stream			stream;
	NTFS_BootSector		bootSector;
	NTFS_BootSector2	bootSector2;
     	
     	stream = new BufferedFile(to!string(args[1]));
     	
	// Read boot sector
	stream.seek(0, SeekPos.Set);
	stream.readExact(cast(ubyte*)&bootSector, 512);
	
	// Read boot sector again
	stream.seek(0, SeekPos.Set);
	stream.readExact(cast(ubyte*)&bootSector2, 512);
	
	return 0;
}
---

And here's the results viewed with gdb (boot code cut for 
brevety) :
---
(gdb) print /x bootSector
$2 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = 0xc00000000000006,
     logicalClusterNumberMFT = 0x20000000000,
     logicalClusterNumberMFTmir = 0xf60000000000,
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = 0x96a04533a0,
     _unused3 = {0x0, 0x0, 0x0, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
}
(gdb) print /x bootSector2
$3 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
     logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 
0x0},
     logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 
0x0, 0x0},
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45, 
0xa0, 0x96},
     _unused3 = {0x0, 0x0, 0x0, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
}
---

Even though both structures have exactly the same memory layout 
in theory, the values of totalSectors, logicalClusterNumberMFT, 
logicalClusterNumberMFTmir and volumeSerialNumber are different 
(shifted 3 bytes).

Worse, when I hex-modify the file to have byte 0x52 equals to 
0xFF (_unused3 becomes {0x0, 0x0, 0xff, 0x0}), this is what 
happens :

---
(gdb) print /x bootSector
$4 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = 0xc00000000000006,
     logicalClusterNumberMFT = 0x20000000000,
     logicalClusterNumberMFTmir = 0xf60000000000,
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = 0xff000096a04533a0,
     _unused3 = {0x0, 0x0, 0xff, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
}
(gdb) print /x bootSector2
$5 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
     logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 
0x0},
     logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 
0x0, 0x0},
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45, 
0xa0, 0x96},
     _unused3 = {0x0, 0x0, 0xff, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
}
---

In the bootSector struct, the volumeSerialNumber field "bled" 
onto _unused3. This is not limited to gdb, the values are also 
wrong when I try to use them in the D code...

I'm using gdc (Debian 4.6.3-2) 4.6.3 and gdb (GDB) 7.4.1-debian.

I thoroughly checked everything, and I'm all out of rational 
explanations. I'm terribly sorry in advance if I missed something 
totally obvious :-)
Jun 26 2014
parent reply "Iain Buclaw via D.gnu" <d.gnu puremagic.com> writes:
On 26 June 2014 15:50, Jean-Baptiste Boric via D.gnu
<d.gnu puremagic.com> wrote:
 Hi there,

 I'm a C/C++ programmer that decided to try D for a small side-project (it
 boils down to extracting informations from a NTFS partition).

 After exhausting all others explanations, I think I found a bug in gdc :
 when I use ulong fields in structs with align(1) they are not properly
 aligned ; even worse, they "bleed" three bytes onto the next field.

 Here's the minimal test code :

 ---
 import std.stdio;
 import std.stream;
 import std.conv;

 //
 // On-disk data structures
 //

 // NTFS BIOS parameter block
 struct NTFS_BootSector_BPB {
         align (1) {
                 ushort          bytesPerSector;
                 ubyte           sectorsPerCluster;
                 ubyte[7]        _unused1;
                 ubyte           mediaDescriptor;
                 ubyte[18]       _unused2;
                 ulong           totalSectors;
                 ulong           logicalClusterNumberMFT;
                 ulong           logicalClusterNumberMFTmir;
                 uint            clustersPerMFTRecord;
                 uint            clustersPerIndexBuffer;
                 ulong           volumeSerialNumber;
                 ubyte[4]        _unused3;
         }
 }

 // NTFS boot sector
 struct NTFS_BootSector {
         align (1) {
                 ubyte[3]                _unused1;
                 ubyte[8]                oemID;
                 NTFS_BootSector_BPB     bpb;
                 ubyte[426]              _unused2;
                 ushort                  signature;
         }
 }

 // NTFS BIOS parameter block 2
 struct NTFS_BootSector_BPB2 {
         align (1) {
                 ushort          bytesPerSector;
                 ubyte           sectorsPerCluster;
                 ubyte[7]        _unused1;
                 ubyte           mediaDescriptor;
                 ubyte[18]       _unused2;
                 ubyte[8]        totalSectors;
                 ubyte[8]        logicalClusterNumberMFT;
                 ubyte[8]        logicalClusterNumberMFTmir;
                 uint            clustersPerMFTRecord;
                 uint            clustersPerIndexBuffer;
                 ubyte[8]        volumeSerialNumber;
                 ubyte[4]        _unused3;
         }
 }

 // NTFS boot sector
 struct NTFS_BootSector2 {
         align (1) {
                 ubyte[3]                _unused1;
                 ubyte[8]                oemID;
                 NTFS_BootSector_BPB2    bpb;
                 ubyte[426]              _unused2;
                 ushort                  signature;
         }
 }

 int main(char[][] args) {
         if(args.length != 2) {
                 writeln("Usage: bug INPUT_FILE");
                 return -1;
         }

         Stream                  stream;
         NTFS_BootSector         bootSector;
         NTFS_BootSector2        bootSector2;

         stream = new BufferedFile(to!string(args[1]));

         // Read boot sector
         stream.seek(0, SeekPos.Set);
         stream.readExact(cast(ubyte*)&bootSector, 512);

         // Read boot sector again
         stream.seek(0, SeekPos.Set);
         stream.readExact(cast(ubyte*)&bootSector2, 512);

         return 0;
 }
 ---

 And here's the results viewed with gdb (boot code cut for brevety) :
 ---
 (gdb) print /x bootSector
 $2 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0,
 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = 0xc00000000000006,
     logicalClusterNumberMFT = 0x20000000000,
     logicalClusterNumberMFTmir = 0xf60000000000,
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = 0x96a04533a0,
     _unused3 = {0x0, 0x0, 0x0, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
 }
 (gdb) print /x bootSector2
 $3 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0,
 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
     logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x0},
     logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45, 0xa0, 0x96},
     _unused3 = {0x0, 0x0, 0x0, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
 }
 ---

 Even though both structures have exactly the same memory layout in theory,
 the values of totalSectors, logicalClusterNumberMFT,
 logicalClusterNumberMFTmir and volumeSerialNumber are different (shifted 3
 bytes).

 Worse, when I hex-modify the file to have byte 0x52 equals to 0xFF (_unused3
 becomes {0x0, 0x0, 0xff, 0x0}), this is what happens :

 ---
 (gdb) print /x bootSector
 $4 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0,
 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = 0xc00000000000006,
     logicalClusterNumberMFT = 0x20000000000,
     logicalClusterNumberMFTmir = 0xf60000000000,
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = 0xff000096a04533a0,
     _unused3 = {0x0, 0x0, 0xff, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
 }
 (gdb) print /x bootSector2
 $5 = {
   _unused1 = {0xeb, 0x52, 0x90},
   oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
   bpb = {
     bytesPerSector = 0x200,
     sectorsPerCluster = 0x8,
     _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     mediaDescriptor = 0xf8,
     _unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0,
 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
     totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
     logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x0},
     logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
     clustersPerMFTRecord = 0x100,
     clustersPerIndexBuffer = 0x451adf00,
     volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45, 0xa0, 0x96},
     _unused3 = {0x0, 0x0, 0xff, 0x0}
   },
   _unused2 = {0xfa...},
   signature = 0xaa55
 }
 ---

 In the bootSector struct, the volumeSerialNumber field "bled" onto _unused3.
 This is not limited to gdb, the values are also wrong when I try to use them
 in the D code...

 I'm using gdc (Debian 4.6.3-2) 4.6.3 and gdb (GDB) 7.4.1-debian.

 I thoroughly checked everything, and I'm all out of rational explanations.
 I'm terribly sorry in advance if I missed something totally obvious :-)
The package gdc-4.6.3 in Debian is terribly old, you wouldn't be able to upgrade gdc-4.8, where this bug was fixed, could you? Regards Iain
Jun 26 2014
parent "Jean-Baptiste Boric" <jblbeurope gmail.com> writes:
On Thursday, 26 June 2014 at 15:06:45 UTC, Iain Buclaw via D.gnu 
wrote:
 The package gdc-4.6.3 in Debian is terribly old, you wouldn't 
 be able
 to upgrade gdc-4.8, where this bug was fixed, could you?

 Regards
 Iain
Hi, I totally forgot that I was using Debian wheezy! My usual computer with Debian testing is stuck with ddrescue'ing a quite badly damaged hard drive (the reason I was writing this tool in the first place). Thanks for pointing out the obvious :-) I'll give it a try on Debian testing as soon as possible.
Jun 26 2014