PenetrationTestingScripts/Github_Leak/GitHack-master/lib/parser.py

136 lines
4.4 KiB
Python

#!/usr/bin/env python
#
# https://github.com/git/git/blob/master/Documentation/technical/index-format.txt
#
import binascii
import collections
import mmap
import struct
import sys
def check(boolean, message):
if not boolean:
import sys
print "error: " + message
sys.exit(1)
def parse(filename, pretty=True):
with open(filename, "rb") as o:
f = mmap.mmap(o.fileno(), 0, access=mmap.ACCESS_READ)
def read(format):
# "All binary numbers are in network byte order."
# Hence "!" = network order, big endian
format = "! " + format
bytes = f.read(struct.calcsize(format))
return struct.unpack(format, bytes)[0]
index = collections.OrderedDict()
# 4-byte signature, b"DIRC"
index["signature"] = f.read(4).decode("ascii")
check(index["signature"] == "DIRC", "Not a Git index file")
# 4-byte version number
index["version"] = read("I")
check(index["version"] in {2, 3},
"Unsupported version: %s" % index["version"])
# 32-bit number of index entries, i.e. 4-byte
index["entries"] = read("I")
yield index
for n in range(index["entries"]):
entry = collections.OrderedDict()
entry["entry"] = n + 1
entry["ctime_seconds"] = read("I")
entry["ctime_nanoseconds"] = read("I")
if pretty:
entry["ctime"] = entry["ctime_seconds"]
entry["ctime"] += entry["ctime_nanoseconds"] / 1000000000
del entry["ctime_seconds"]
del entry["ctime_nanoseconds"]
entry["mtime_seconds"] = read("I")
entry["mtime_nanoseconds"] = read("I")
if pretty:
entry["mtime"] = entry["mtime_seconds"]
entry["mtime"] += entry["mtime_nanoseconds"] / 1000000000
del entry["mtime_seconds"]
del entry["mtime_nanoseconds"]
entry["dev"] = read("I")
entry["ino"] = read("I")
# 4-bit object type, 3-bit unused, 9-bit unix permission
entry["mode"] = read("I")
if pretty:
entry["mode"] = "%06o" % entry["mode"]
entry["uid"] = read("I")
entry["gid"] = read("I")
entry["size"] = read("I")
entry["sha1"] = binascii.hexlify(f.read(20)).decode("ascii")
entry["flags"] = read("H")
# 1-bit assume-valid
entry["assume-valid"] = bool(entry["flags"] & (0b10000000 << 8))
# 1-bit extended, must be 0 in version 2
entry["extended"] = bool(entry["flags"] & (0b01000000 << 8))
# 2-bit stage (?)
stage_one = bool(entry["flags"] & (0b00100000 << 8))
stage_two = bool(entry["flags"] & (0b00010000 << 8))
entry["stage"] = stage_one, stage_two
# 12-bit name length, if the length is less than 0xFFF (else, 0xFFF)
namelen = entry["flags"] & 0xFFF
# 62 bytes so far
entrylen = 62
if entry["extended"] and (index["version"] == 3):
entry["extra-flags"] = read("H")
# 1-bit reserved
entry["reserved"] = bool(entry["extra-flags"] & (0b10000000 << 8))
# 1-bit skip-worktree
entry["skip-worktree"] = bool(entry["extra-flags"] & (0b01000000 << 8))
# 1-bit intent-to-add
entry["intent-to-add"] = bool(entry["extra-flags"] & (0b00100000 << 8))
# 13-bits unused
# used = entry["extra-flags"] & (0b11100000 << 8)
# check(not used, "Expected unused bits in extra-flags")
entrylen += 2
if namelen < 0xFFF:
entry["name"] = f.read(namelen).decode("utf-8", "replace")
entrylen += namelen
else:
# Do it the hard way
name = []
while True:
byte = f.read(1)
if byte == "\x00":
break
name.append(byte)
entry["name"] = b"".join(name).decode("utf-8", "replace")
entrylen += 1
padlen = (8 - (entrylen % 8)) or 8
nuls = f.read(padlen)
check(set(nuls) == set(['\x00']), "padding contained non-NUL")
yield entry
f.close()