[v2,1/4] pmdinfogen: support COFF

Message ID 20210108024723.26210-2-dmitry.kozliuk@gmail.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series pmdinfogen: support Windows |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Dmitry Kozlyuk Jan. 8, 2021, 2:47 a.m. UTC
  Common Object File Format (COFF) is used on Windows in place of ELF.

Add COFF parser to pmdinfogen. Also add an argument to specify input
file format, which is selected at configure time based on the target.

Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
---
Depends-on: series-13153 ("pmdinfogen: rewrite in Python")

 buildtools/coff.py       | 154 +++++++++++++++++++++++++++++++++++++++
 buildtools/meson.build   |   7 ++
 buildtools/pmdinfogen.py | 117 +++++++++++++++++++++--------
 3 files changed, 248 insertions(+), 30 deletions(-)
 create mode 100644 buildtools/coff.py
  

Patch

diff --git a/buildtools/coff.py b/buildtools/coff.py
new file mode 100644
index 000000000..86fb0602b
--- /dev/null
+++ b/buildtools/coff.py
@@ -0,0 +1,154 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2020 Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
+
+import ctypes
+
+# x86_64 little-endian
+COFF_MAGIC = 0x8664
+
+# Names up to this length are stored immediately in symbol table entries.
+COFF_NAMELEN = 8
+
+# Special "section numbers" changing the meaning of symbol table entry.
+COFF_SN_UNDEFINED = 0
+COFF_SN_ABSOLUTE = -1
+COFF_SN_DEBUG = -2
+
+
+class CoffFileHeader(ctypes.LittleEndianStructure):
+    _pack_ = True
+    _fields_ = [
+        ("magic", ctypes.c_uint16),
+        ("section_count", ctypes.c_uint16),
+        ("timestamp", ctypes.c_uint32),
+        ("symbol_table_offset", ctypes.c_uint32),
+        ("symbol_count", ctypes.c_uint32),
+        ("optional_header_size", ctypes.c_uint16),
+        ("flags", ctypes.c_uint16),
+    ]
+
+
+class CoffName(ctypes.Union):
+    class Reference(ctypes.LittleEndianStructure):
+        _pack_ = True
+        _fields_ = [
+            ("zeroes", ctypes.c_uint32),
+            ("offset", ctypes.c_uint32),
+        ]
+
+    Immediate = ctypes.c_char * 8
+
+    _pack_ = True
+    _fields_ = [
+        ("immediate", Immediate),
+        ("reference", Reference),
+    ]
+
+
+class CoffSection(ctypes.LittleEndianStructure):
+    _pack_ = True
+    _fields_ = [
+        ("name", CoffName),
+        ("physical_address", ctypes.c_uint32),
+        ("physical_address", ctypes.c_uint32),
+        ("size", ctypes.c_uint32),
+        ("data_offset", ctypes.c_uint32),
+        ("relocations_offset", ctypes.c_uint32),
+        ("line_numbers_offset", ctypes.c_uint32),
+        ("relocation_count", ctypes.c_uint16),
+        ("line_number_count", ctypes.c_uint16),
+        ("flags", ctypes.c_uint32),
+    ]
+
+
+class CoffSymbol(ctypes.LittleEndianStructure):
+    _pack_ = True
+    _fields_ = [
+        ("name", CoffName),
+        ("value", ctypes.c_uint32),
+        ("section_number", ctypes.c_int16),
+        ("type", ctypes.c_uint16),
+        ("storage_class", ctypes.c_uint8),
+        ("auxiliary_count", ctypes.c_uint8),
+    ]
+
+
+class Symbol:
+    def __init__(self, image, symbol: CoffSymbol):
+        self._image = image
+        self._coff = symbol
+
+    @property
+    def name(self):
+        if self._coff.name.reference.zeroes:
+            return decode_asciiz(bytes(self._coff.name.immediate))
+
+        offset = self._coff.name.reference.offset
+        offset -= ctypes.sizeof(ctypes.c_uint32)
+        return self._image.get_string(offset)
+
+    def get_value(self, offset):
+        section_number = self._coff.section_number
+
+        if section_number == COFF_SN_UNDEFINED:
+            return None
+
+        if section_number == COFF_SN_DEBUG:
+            return None
+
+        if section_number == COFF_SN_ABSOLUTE:
+            return bytes(ctypes.c_uint32(self._coff.value))
+
+        section_data = self._image.get_section_data(section_number)
+        section_offset = self._coff.value + offset
+        return section_data[section_offset:]
+
+
+class Image:
+    def __init__(self, data):
+        header = CoffFileHeader.from_buffer_copy(data)
+        header_size = ctypes.sizeof(header) + header.optional_header_size
+
+        sections_desc = CoffSection * header.section_count
+        sections = sections_desc.from_buffer_copy(data, header_size)
+
+        symbols_desc = CoffSymbol * header.symbol_count
+        symbols = symbols_desc.from_buffer_copy(data, header.symbol_table_offset)
+
+        strings_offset = header.symbol_table_offset + ctypes.sizeof(symbols)
+        strings = Image._parse_strings(data[strings_offset:])
+
+        self._data = data
+        self._header = header
+        self._sections = sections
+        self._symbols = symbols
+        self._strings = strings
+
+    @staticmethod
+    def _parse_strings(data):
+        full_size = ctypes.c_uint32.from_buffer_copy(data)
+        header_size = ctypes.sizeof(full_size)
+        return data[header_size : full_size.value]
+
+    @property
+    def symbols(self):
+        i = 0
+        while i < self._header.symbol_count:
+            symbol = self._symbols[i]
+            yield Symbol(self, symbol)
+            i += symbol.auxiliary_count + 1
+
+    def get_section_data(self, number):
+        # section numbers are 1-based
+        section = self._sections[number - 1]
+        base = section.data_offset
+        return self._data[base : base + section.size]
+
+    def get_string(self, offset):
+        return decode_asciiz(self._strings[offset:])
+
+
+def decode_asciiz(data):
+    index = data.find(b'\x00')
+    end = index if index >= 0 else len(data)
+    return data[:end].decode()
diff --git a/buildtools/meson.build b/buildtools/meson.build
index dd4c0f640..23cefd4be 100644
--- a/buildtools/meson.build
+++ b/buildtools/meson.build
@@ -17,7 +17,14 @@  else
 endif
 map_to_win_cmd = py3 + files('map_to_win.py')
 sphinx_wrapper = py3 + files('call-sphinx-build.py')
+
+# select object file format
 pmdinfogen = py3 + files('pmdinfogen.py')
+if host_machine.system() == 'windows'
+	pmdinfogen += 'coff'
+else
+	pmdinfogen += 'elf'
+endif
 
 # TODO: starting from Meson 0.51.0 use
 # 	python3 = import('python').find_installation('python',
diff --git a/buildtools/pmdinfogen.py b/buildtools/pmdinfogen.py
index 0cca47ff1..474168f21 100755
--- a/buildtools/pmdinfogen.py
+++ b/buildtools/pmdinfogen.py
@@ -9,8 +9,13 @@ 
 import sys
 import tempfile
 
-from elftools.elf.elffile import ELFFile
-from elftools.elf.sections import SymbolTableSection
+try:
+    from elftools.elf.elffile import ELFFile
+    from elftools.elf.sections import SymbolTableSection
+except ImportError:
+    pass
+
+import coff
 
 
 class ELFSymbol:
@@ -18,21 +23,18 @@  def __init__(self, image, symbol):
         self._image = image
         self._symbol = symbol
 
-    @property
-    def size(self):
-        return self._symbol["st_size"]
-
-    @property
-    def value(self):
-        data = self._image.get_section_data(self._symbol["st_shndx"])
-        base = self._symbol["st_value"]
-        return data[base:base + self.size]
-
     @property
     def string_value(self):
-        value = self.value
+        size = self._symbol["st_size"]
+        value = self.get_value(0, size)
         return value[:-1].decode() if value else ""
 
+    def get_value(self, offset, size):
+        section = self._symbol["st_shndx"]
+        data = self._image.get_section(section).data()
+        base = self._symbol["st_value"] + offset
+        return data[base : base + size]
+
 
 class ELFImage:
     def __init__(self, data):
@@ -45,18 +47,50 @@  def __init__(self, data):
     def is_big_endian(self):
         return not self._image.little_endian
 
-    def get_section_data(self, name):
-        return self._image.get_section(name).data()
-
     def find_by_name(self, name):
         symbol = self._symtab.get_symbol_by_name(name)
-        return ELFSymbol(self, symbol[0]) if symbol else None
+        return ELFSymbol(self._image, symbol[0]) if symbol else None
 
     def find_by_prefix(self, prefix):
         for i in range(self._symtab.num_symbols()):
             symbol = self._symtab.get_symbol(i)
             if symbol.name.startswith(prefix):
-                yield ELFSymbol(self, symbol)
+                yield ELFSymbol(self._image, symbol)
+
+
+class COFFSymbol:
+    def __init__(self, image, symbol):
+        self._image = image
+        self._symbol = symbol
+
+    def get_value(self, offset, size):
+        value = self._symbol.get_value(offset)
+        return value[:size] if value else value
+
+    @property
+    def string_value(self):
+        value = self._symbol.get_value(0)
+        return coff.decode_asciiz(value) if value else ''
+
+
+class COFFImage:
+    def __init__(self, data):
+        self._image = coff.Image(data)
+
+    @property
+    def is_big_endian(self):
+        return False
+
+    def find_by_prefix(self, prefix):
+        for symbol in self._image.symbols:
+            if symbol.name.startswith(prefix):
+                yield COFFSymbol(self._image, symbol)
+
+    def find_by_name(self, name):
+        for symbol in self._image.symbols:
+            if symbol.name == name:
+                return COFFSymbol(self._image, symbol)
+        return None
 
 
 def define_rte_pci_id(is_big_endian):
@@ -117,19 +151,24 @@  def _load_pci_ids(image, table_name_symbol):
 
         rte_pci_id = define_rte_pci_id(image.is_big_endian)
 
-        pci_id_size = ctypes.sizeof(rte_pci_id)
-        pci_ids_desc = rte_pci_id * (table_symbol.size // pci_id_size)
-        pci_ids = pci_ids_desc.from_buffer_copy(table_symbol.value)
         result = []
-        for pci_id in pci_ids:
+        while True:
+            size = ctypes.sizeof(rte_pci_id)
+            offset = size * len(result)
+            data = table_symbol.get_value(offset, size)
+            if not data:
+                break
+            pci_id = rte_pci_id.from_buffer_copy(data)
             if not pci_id.device_id:
                 break
-            result.append([
-                pci_id.vendor_id,
-                pci_id.device_id,
-                pci_id.subsystem_vendor_id,
-                pci_id.subsystem_device_id,
-                ])
+            result.append(
+                [
+                    pci_id.vendor_id,
+                    pci_id.device_id,
+                    pci_id.subsystem_vendor_id,
+                    pci_id.subsystem_device_id,
+                ]
+            )
         return result
 
     def dump(self, file):
@@ -157,6 +196,7 @@  def dump_drivers(drivers, file):
 
 def parse_args():
     parser = argparse.ArgumentParser()
+    parser.add_argument("format", help="object file format, 'elf' or 'coff'")
     parser.add_argument("input", help="input object file path or '-' for stdin")
     parser.add_argument("output", help="output C file path or '-' for stdout")
     return parser.parse_args()
@@ -170,6 +210,21 @@  def open_input(path):
     return open(path, "rb")
 
 
+def read_input(path):
+    if path == "-":
+        return sys.stdin.buffer.read()
+    with open(path, "rb") as file:
+        return file.read()
+
+
+def load_image(fmt, path):
+    if fmt == "elf":
+        return ELFImage(open_input(path))
+    if fmt == "coff":
+        return COFFImage(read_input(path))
+    raise Exception("unsupported object file format")
+
+
 def open_output(path):
     if path == "-":
         return sys.stdout
@@ -178,8 +233,10 @@  def open_output(path):
 
 def main():
     args = parse_args()
-    infile = open_input(args.input)
-    image = ELFImage(infile)
+    if args.format == "elf" and "ELFFile" not in globals():
+        raise Exception("elftools module not found")
+
+    image = load_image(args.format, args.input)
     drivers = load_drivers(image)
     output = open_output(args.output)
     dump_drivers(drivers, output)