diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..f2f0487a --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +build/ +__pycache__/ + +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + diff --git a/README.md b/README.md new file mode 100644 index 00000000..474628ee --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# MangoHud + +A modification of the Mesa vulkan overlay. Personal improvements including temperature reporting and logging capabilities. + +#### Comparison (outdated) +![](assets/overlay_comparison.gif) + +# Installation +- Arch linux: [PKGBUILD](https://github.com/flightlessmango/PKGBUILDS/blob/master/mangohud/PKGBUILD) + +# Normal usage + +To enable the MangoHud vulkan overlay layer, run : + +`MANGOHUD=1 /path/to/my_vulkan_app` + +Or alternatively, add `MANGOHUD=1` to your shell profile. + +## MANGOHUD_CONFIG parameters + +You can customize the hud by using the MANGOHUD_CONFIG environment variable while separating different options with a comma. + +- `cpu_temp` : Displays current CPU temperature +- `gpu_temp` : Displays current GPU temperature +- `core_load` : Displays current CPU load per core +- `font_size` : Changes the default font size (default is 24) +- `width` : Set custom hud width +- `height` : Set custom hud height +- `position=x`: Available values for `x` include `top-left`, `top-right`, `bottom-left`, and `bottom-right` + +Note: Width and Height are set automatically based on the font_size, but can be overridden. + +Example: `MANGOHUD_CONFIG=cpu_temp,gpu_temp,position=top-right,height=500,font_size=32` + +## Environment Variables +- `MANGOHUD_OUTPUT` : Define name and location of the output file (Required for logging) +- `MANGOHUD_FONT`: Change default font (set location to .TTF/.OTF file ) + +## Keybindings +- `F2` : Toggle Logging +- `F12`: Toggle Hud + +## MangoHud fps logging + +When you toggle logging (using the keybind `F2`), a file is created with your chosen name (using `MANGOHUD_OUTPUT`) plus a date & timestamp. + +This file can be uploaded to [Flightlessmango.com](https://flightlessmango.com/games/user_benchmarks) to create graphs automatically. +you can share the created page with others, just link it. + +#### Multiple log files + +It's possible to upload multiple files when using [Flightlessmango.com](https://flightlessmango.com/games/user_benchmarks). You can rename them to your preferred names and upload them in a batch. +These filenames will be used as the legend in the graph. + +#### Log uploading walkthrough + +![](assets/log_upload_example.gif) + +# Notable changes +- Removed hud decoration [90a2212](https://github.com/flightlessmango/mesa/commit/90a2212055a8047d46d0220d5fdc30a76900aaed) +- Changed frametime graph to Lines instead of Histogram [e40533b](https://github.com/flightlessmango/mesa/commit/e40533b7f46858e5b9f08829e789277b2364d5d1) +- Set static min/max ms on frametime graph to act like Afterburners graph [df5238f](https://github.com/flightlessmango/mesa/commit/df5238f990218f5d6e698d572b05ddd19e52b108) +- Added CPU/GPU usage (Only Nvidia and AMD) +- Changed font to UbuntuMono-Bold [73f0aa9](https://github.com/flightlessmango/mesa/commit/73f0aa94d382365205a4a4128d82208315b0b190) +- Increased hud font size [b7d238b](https://github.com/flightlessmango/mesa/commit/b7d238b07eb82153f272d34bf7d1353b701f32e0) diff --git a/assets/log_upload_example.gif b/assets/log_upload_example.gif new file mode 100644 index 00000000..474425b9 Binary files /dev/null and b/assets/log_upload_example.gif differ diff --git a/assets/overlay_comparison.gif b/assets/overlay_comparison.gif new file mode 100644 index 00000000..00cf7e2b Binary files /dev/null and b/assets/overlay_comparison.gif differ diff --git a/bin/gen_enum_to_str.py b/bin/gen_enum_to_str.py new file mode 100644 index 00000000..c4f8b262 --- /dev/null +++ b/bin/gen_enum_to_str.py @@ -0,0 +1,447 @@ +# encoding=utf-8 +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Create enum to string functions for vulkan using vk.xml.""" + +from __future__ import print_function +import argparse +import os +import textwrap +import xml.etree.cElementTree as et + +from mako.template import Template + +COPYRIGHT = textwrap.dedent(u"""\ + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE.""") + +C_TEMPLATE = Template(textwrap.dedent(u"""\ + /* Autogenerated file -- do not edit + * generated by ${file} + * + ${copyright} + */ + + #include + #include + #include + #include "../src/mesa/util/macros.h" + #include "vk_enum_to_str.h" + + % for enum in enums: + + % if enum.guard: +#ifdef ${enum.guard} + % endif + const char * + vk_${enum.name[2:]}_to_str(${enum.name} input) + { + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wswitch" + switch(input) { + % for v in sorted(enum.values.keys()): + case ${v}: + return "${enum.values[v]}"; + % endfor + } + #pragma GCC diagnostic pop + unreachable("Undefined enum value."); + } + + % if enum.guard: +#endif + % endif + %endfor + + size_t vk_structure_type_size(const struct VkBaseInStructure *item) + { + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wswitch" + switch(item->sType) { + % for struct in structs: + % if struct.extension is not None and struct.extension.define is not None: + #ifdef ${struct.extension.define} + case ${struct.stype}: return sizeof(${struct.name}); + #endif + % else: + case ${struct.stype}: return sizeof(${struct.name}); + % endif + %endfor + } + #pragma GCC diagnostic pop + unreachable("Undefined struct type."); + } + + void vk_load_instance_commands(VkInstance instance, + PFN_vkGetInstanceProcAddr gpa, + struct vk_instance_dispatch_table *table) + { + memset(table, 0, sizeof(*table)); + table->GetInstanceProcAddr = gpa; + % for cmd in commands: + % if not cmd.device_entrypoint and cmd.name != 'vkGetInstanceProcAddr': + % if cmd.extension is not None and cmd.extension.define is not None: + #ifdef ${cmd.extension.define} + table->${cmd.name[2:]} = (PFN_${cmd.name}) gpa(instance, "${cmd.name}"); + #endif + % else: + table->${cmd.name[2:]} = (PFN_${cmd.name}) gpa(instance, "${cmd.name}"); + % endif + % endif + %endfor + } + + void vk_load_device_commands(VkDevice device, + PFN_vkGetDeviceProcAddr gpa, + struct vk_device_dispatch_table *table) + { + memset(table, 0, sizeof(*table)); + table->GetDeviceProcAddr = gpa; + % for cmd in commands: + % if cmd.device_entrypoint and cmd.name != 'vkGetDeviceProcAddr': + % if cmd.extension is not None and cmd.extension.define is not None: + #ifdef ${cmd.extension.define} + table->${cmd.name[2:]} = (PFN_${cmd.name}) gpa(device, "${cmd.name}"); + #endif + % else: + table->${cmd.name[2:]} = (PFN_${cmd.name}) gpa(device, "${cmd.name}"); + % endif + % endif + %endfor + } + """), + output_encoding='utf-8') + +H_TEMPLATE = Template(textwrap.dedent(u"""\ + /* Autogenerated file -- do not edit + * generated by ${file} + * + ${copyright} + */ + + #ifndef MESA_VK_ENUM_TO_STR_H + #define MESA_VK_ENUM_TO_STR_H + + #include + #include + + #ifdef __cplusplus + extern "C" { + #endif + + % for ext in extensions: + #define _${ext.name}_number (${ext.number}) + % endfor + + % for enum in enums: + % if enum.guard: +#ifdef ${enum.guard} + % endif + const char * vk_${enum.name[2:]}_to_str(${enum.name} input); + % if enum.guard: +#endif + % endif + % endfor + + size_t vk_structure_type_size(const struct VkBaseInStructure *item); + + struct vk_instance_dispatch_table { + PFN_vkGetInstanceProcAddr GetInstanceProcAddr; + % for cmd in commands: + % if not cmd.device_entrypoint and cmd.name != 'vkGetInstanceProcAddr': + % if cmd.extension is not None and cmd.extension.define is not None: + #ifdef ${cmd.extension.define} + PFN_${cmd.name} ${cmd.name[2:]}; + #endif + % else: + PFN_${cmd.name} ${cmd.name[2:]}; + % endif + % endif + %endfor + }; + + struct vk_device_dispatch_table { + PFN_vkGetDeviceProcAddr GetDeviceProcAddr; + % for cmd in commands: + % if cmd.device_entrypoint and cmd.name != 'vkGetDeviceProcAddr': + % if cmd.extension is not None and cmd.extension.define is not None: + #ifdef ${cmd.extension.define} + PFN_${cmd.name} ${cmd.name[2:]}; + #endif + % else: + PFN_${cmd.name} ${cmd.name[2:]}; + % endif + % endif + %endfor + }; + + void vk_load_instance_commands(VkInstance instance, PFN_vkGetInstanceProcAddr gpa, struct vk_instance_dispatch_table *table); + void vk_load_device_commands(VkDevice device, PFN_vkGetDeviceProcAddr gpa, struct vk_device_dispatch_table *table); + + #ifdef __cplusplus + } /* extern "C" */ + #endif + + #endif"""), + output_encoding='utf-8') + + +class NamedFactory(object): + """Factory for creating enums.""" + + def __init__(self, type_): + self.registry = {} + self.type = type_ + + def __call__(self, name, **kwargs): + try: + return self.registry[name] + except KeyError: + n = self.registry[name] = self.type(name, **kwargs) + return n + + def get(self, name): + return self.registry.get(name) + + +class VkExtension(object): + """Simple struct-like class representing extensions""" + + def __init__(self, name, number=None, define=None): + self.name = name + self.number = number + self.define = define + + +class VkEnum(object): + """Simple struct-like class representing a single Vulkan Enum.""" + + def __init__(self, name, values=None): + self.name = name + self.extension = None + # Maps numbers to names + self.values = values or dict() + self.name_to_value = dict() + self.guard = None + self.name_to_alias_list = {} + + def add_value(self, name, value=None, + extnum=None, offset=None, alias=None, + error=False): + if alias is not None: + assert value is None and offset is None + if alias not in self.name_to_value: + # We don't have this alias yet. Just record the alias and + # we'll deal with it later. + alias_list = self.name_to_alias_list.get(alias, []) + alias_list.append(name); + return + + # Use the value from the alias + value = self.name_to_value[alias] + + assert value is not None or extnum is not None + if value is None: + value = 1000000000 + (extnum - 1) * 1000 + offset + if error: + value = -value + + self.name_to_value[name] = value + if value not in self.values: + self.values[value] = name + elif len(self.values[value]) > len(name): + self.values[value] = name + + # Now that the value has been fully added, resolve aliases, if any. + if name in self.name_to_alias_list: + for alias in self.name_to_alias_list[name]: + add_value(alias, value) + del self.name_to_alias_list[name] + + def add_value_from_xml(self, elem, extension=None): + self.extension = extension + if 'value' in elem.attrib: + self.add_value(elem.attrib['name'], + value=int(elem.attrib['value'], base=0)) + elif 'alias' in elem.attrib: + self.add_value(elem.attrib['name'], alias=elem.attrib['alias']) + else: + error = 'dir' in elem.attrib and elem.attrib['dir'] == '-' + if 'extnumber' in elem.attrib: + extnum = int(elem.attrib['extnumber']) + else: + extnum = extension.number + self.add_value(elem.attrib['name'], + extnum=extnum, + offset=int(elem.attrib['offset']), + error=error) + + def set_guard(self, g): + self.guard = g + + +class VkCommand(object): + """Simple struct-like class representing a single Vulkan command""" + + def __init__(self, name, device_entrypoint=False): + self.name = name + self.device_entrypoint = device_entrypoint + self.extension = None + + +class VkChainStruct(object): + """Simple struct-like class representing a single Vulkan struct identified with a VkStructureType""" + def __init__(self, name, stype): + self.name = name + self.stype = stype + self.extension = None + + +def struct_get_stype(xml_node): + for member in xml_node.findall('./member'): + name = member.findall('./name') + if len(name) > 0 and name[0].text == "sType": + return member.get('values') + return None + + +def parse_xml(cmd_factory, enum_factory, ext_factory, struct_factory, filename): + """Parse the XML file. Accumulate results into the factories. + + This parser is a memory efficient iterative XML parser that returns a list + of VkEnum objects. + """ + + xml = et.parse(filename) + + for enum_type in xml.findall('./enums[@type="enum"]'): + enum = enum_factory(enum_type.attrib['name']) + for value in enum_type.findall('./enum'): + enum.add_value_from_xml(value) + + for value in xml.findall('./feature/require/enum[@extends]'): + enum = enum_factory.get(value.attrib['extends']) + if enum is not None: + enum.add_value_from_xml(value) + + for command in xml.findall('./commands/command'): + name = command.find('./proto/name') + first_arg = command.find('./param/type') + # Some commands are alias KHR -> nonKHR, ignore those + if name is not None: + cmd_factory(name.text, + device_entrypoint=(first_arg.text in ('VkDevice', 'VkCommandBuffer', 'VkQueue'))) + + for struct_type in xml.findall('./types/type[@category="struct"]'): + name = struct_type.attrib['name'] + stype = struct_get_stype(struct_type) + if stype is not None: + struct_factory(name, stype=stype) + + platform_define = {} + for platform in xml.findall('./platforms/platform'): + name = platform.attrib['name'] + define = platform.attrib['protect'] + platform_define[name] = define + + for ext_elem in xml.findall('./extensions/extension[@supported="vulkan"]'): + define = None + if "platform" in ext_elem.attrib: + define = platform_define[ext_elem.attrib['platform']] + extension = ext_factory(ext_elem.attrib['name'], + number=int(ext_elem.attrib['number']), + define=define) + + for value in ext_elem.findall('./require/enum[@extends]'): + enum = enum_factory.get(value.attrib['extends']) + if enum is not None: + enum.add_value_from_xml(value, extension) + for t in ext_elem.findall('./require/type'): + struct = struct_factory.get(t.attrib['name']) + if struct is not None: + struct.extension = extension + + if define: + for value in ext_elem.findall('./require/type[@name]'): + enum = enum_factory.get(value.attrib['name']) + if enum is not None: + enum.set_guard(define) + + for t in ext_elem.findall('./require/command'): + command = cmd_factory.get(t.attrib['name']) + if command is not None: + command.extension = extension + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--xml', required=True, + help='Vulkan API XML files', + action='append', + dest='xml_files') + parser.add_argument('--outdir', + help='Directory to put the generated files in', + required=True) + + args = parser.parse_args() + + command_factory = NamedFactory(VkCommand) + enum_factory = NamedFactory(VkEnum) + ext_factory = NamedFactory(VkExtension) + struct_factory = NamedFactory(VkChainStruct) + for filename in args.xml_files: + parse_xml(command_factory, enum_factory, ext_factory, struct_factory, filename) + commands = sorted(command_factory.registry.values(), key=lambda e: e.name) + enums = sorted(enum_factory.registry.values(), key=lambda e: e.name) + extensions = sorted(ext_factory.registry.values(), key=lambda e: e.name) + structs = sorted(struct_factory.registry.values(), key=lambda e: e.name) + + for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.c')), + (H_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.h'))]: + with open(file_, 'wb') as f: + f.write(template.render( + file=os.path.basename(__file__), + commands=commands, + enums=enums, + extensions=extensions, + structs=structs, + copyright=COPYRIGHT)) + + +if __name__ == '__main__': + main() diff --git a/bin/git_sha1_gen.py b/bin/git_sha1_gen.py new file mode 100644 index 00000000..c6fbf490 --- /dev/null +++ b/bin/git_sha1_gen.py @@ -0,0 +1,50 @@ +""" +Generate the contents of the git_sha1.h file. +The output of this script goes to stdout. +""" + + +import argparse +import os +import os.path +import subprocess +import sys + + +def get_git_sha1(): + """Try to get the git SHA1 with git rev-parse.""" + git_dir = os.path.join(os.path.dirname(sys.argv[0]), '..', '.git') + try: + git_sha1 = subprocess.check_output([ + 'git', + '--git-dir=' + git_dir, + 'rev-parse', + 'HEAD', + ], stderr=open(os.devnull, 'w')).decode("ascii") + except: + # don't print anything if it fails + git_sha1 = '' + return git_sha1 + +def write_if_different(contents): + """ + Avoid touching the output file if it doesn't need modifications + Useful to avoid triggering rebuilds when nothing has changed. + """ + if os.path.isfile(args.output): + with open(args.output, 'r') as file: + if file.read() == contents: + return + with open(args.output, 'w') as file: + file.write(contents) + +parser = argparse.ArgumentParser() +parser.add_argument('--output', help='File to write the #define in', + required=True) +args = parser.parse_args() + +git_sha1 = os.environ.get('MESA_GIT_SHA1_OVERRIDE', get_git_sha1())[:10] +if git_sha1: + write_if_different('#define MESA_GIT_SHA1 " (git-' + git_sha1 + ')"') +else: + write_if_different('#define MESA_GIT_SHA1 ""') diff --git a/include/vulkan/vk_android_native_buffer.h b/include/vulkan/vk_android_native_buffer.h new file mode 100644 index 00000000..8a777407 --- /dev/null +++ b/include/vulkan/vk_android_native_buffer.h @@ -0,0 +1,135 @@ +/* + * Copyright 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __VK_ANDROID_NATIVE_BUFFER_H__ +#define __VK_ANDROID_NATIVE_BUFFER_H__ + +/* MESA: A hack to avoid #ifdefs in driver code. */ +#ifdef ANDROID +#include +#include +#include +#else +typedef void *buffer_handle_t; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define VK_ANDROID_native_buffer 1 +#define VK_ANDROID_NATIVE_BUFFER_EXTENSION_NUMBER 11 + +/* NOTE ON VK_ANDROID_NATIVE_BUFFER_SPEC_VERSION 6 + * + * This version of the extension transitions from gralloc0 to gralloc1 usage + * flags (int -> 2x uint64_t). The WSI implementation will temporarily continue + * to fill out deprecated fields in VkNativeBufferANDROID, and will call the + * deprecated vkGetSwapchainGrallocUsageANDROID if the new + * vkGetSwapchainGrallocUsage2ANDROID is not supported. This transitionary + * backwards-compatibility support is temporary, and will likely be removed in + * (along with all gralloc0 support) in a future release. + */ +#define VK_ANDROID_NATIVE_BUFFER_SPEC_VERSION 7 +#define VK_ANDROID_NATIVE_BUFFER_EXTENSION_NAME "VK_ANDROID_native_buffer" + +#define VK_ANDROID_NATIVE_BUFFER_ENUM(type,id) ((type)(1000000000 + (1000 * (VK_ANDROID_NATIVE_BUFFER_EXTENSION_NUMBER - 1)) + (id))) +#define VK_STRUCTURE_TYPE_NATIVE_BUFFER_ANDROID VK_ANDROID_NATIVE_BUFFER_ENUM(VkStructureType, 0) +#define VK_STRUCTURE_TYPE_SWAPCHAIN_IMAGE_CREATE_INFO_ANDROID VK_ANDROID_NATIVE_BUFFER_ENUM(VkStructureType, 1) +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID VK_ANDROID_NATIVE_BUFFER_ENUM(VkStructureType, 2) + +typedef enum VkSwapchainImageUsageFlagBitsANDROID { + VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID = 0x00000001, + VK_SWAPCHAIN_IMAGE_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSwapchainImageUsageFlagBitsANDROID; +typedef VkFlags VkSwapchainImageUsageFlagsANDROID; +typedef struct { + VkStructureType sType; // must be VK_STRUCTURE_TYPE_NATIVE_BUFFER_ANDROID + const void* pNext; + + // Buffer handle and stride returned from gralloc alloc() + buffer_handle_t handle; + int stride; + + // Gralloc format and usage requested when the buffer was allocated. + int format; + int usage; // DEPRECATED in SPEC_VERSION 6 + + // -- Added in SPEC_VERSION 6 -- + struct { + uint64_t consumer; + uint64_t producer; + } usage2; +} VkNativeBufferANDROID; + +typedef struct { + VkStructureType sType; // must be VK_STRUCTURE_TYPE_SWAPCHAIN_IMAGE_CREATE_INFO_ANDROID + const void* pNext; + + VkSwapchainImageUsageFlagsANDROID usage; +} VkSwapchainImageCreateInfoANDROID; + +typedef struct { + VkStructureType sType; // must be VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID + const void* pNext; + + VkBool32 sharedImage; +} VkPhysicalDevicePresentationPropertiesANDROID; + +// -- DEPRECATED in SPEC_VERSION 6 -- +typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainGrallocUsageANDROID)(VkDevice device, VkFormat format, VkImageUsageFlags imageUsage, int* grallocUsage); +// -- ADDED in SPEC_VERSION 6 -- +typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainGrallocUsage2ANDROID)(VkDevice device, VkFormat format, VkImageUsageFlags imageUsage, VkSwapchainImageUsageFlagsANDROID swapchainImageUsage, uint64_t* grallocConsumerUsage, uint64_t* grallocProducerUsage); +typedef VkResult (VKAPI_PTR *PFN_vkAcquireImageANDROID)(VkDevice device, VkImage image, int nativeFenceFd, VkSemaphore semaphore, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkQueueSignalReleaseImageANDROID)(VkQueue queue, uint32_t waitSemaphoreCount, const VkSemaphore* pWaitSemaphores, VkImage image, int* pNativeFenceFd); + +#ifndef VK_NO_PROTOTYPES +// -- DEPRECATED in SPEC_VERSION 6 -- +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainGrallocUsageANDROID( + VkDevice device, + VkFormat format, + VkImageUsageFlags imageUsage, + int* grallocUsage +); +// -- ADDED in SPEC_VERSION 6 -- +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainGrallocUsage2ANDROID( + VkDevice device, + VkFormat format, + VkImageUsageFlags imageUsage, + VkSwapchainImageUsageFlagsANDROID swapchainImageUsage, + uint64_t* grallocConsumerUsage, + uint64_t* grallocProducerUsage +); +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireImageANDROID( + VkDevice device, + VkImage image, + int nativeFenceFd, + VkSemaphore semaphore, + VkFence fence +); +VKAPI_ATTR VkResult VKAPI_CALL vkQueueSignalReleaseImageANDROID( + VkQueue queue, + uint32_t waitSemaphoreCount, + const VkSemaphore* pWaitSemaphores, + VkImage image, + int* pNativeFenceFd +); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // __VK_ANDROID_NATIVE_BUFFER_H__ diff --git a/include/vulkan/vk_util.h b/include/vulkan/vk_util.h new file mode 100644 index 00000000..8ae384b9 --- /dev/null +++ b/include/vulkan/vk_util.h @@ -0,0 +1,225 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef VK_UTIL_H +#define VK_UTIL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* common inlines and macros for vulkan drivers */ + +#include + +#define vk_foreach_struct(__iter, __start) \ + for (struct VkBaseOutStructure *__iter = (struct VkBaseOutStructure *)(__start); \ + __iter; __iter = __iter->pNext) + +#define vk_foreach_struct_const(__iter, __start) \ + for (const struct VkBaseInStructure *__iter = (const struct VkBaseInStructure *)(__start); \ + __iter; __iter = __iter->pNext) + +/** + * A wrapper for a Vulkan output array. A Vulkan output array is one that + * follows the convention of the parameters to + * vkGetPhysicalDeviceQueueFamilyProperties(). + * + * Example Usage: + * + * VkResult + * vkGetPhysicalDeviceQueueFamilyProperties( + * VkPhysicalDevice physicalDevice, + * uint32_t* pQueueFamilyPropertyCount, + * VkQueueFamilyProperties* pQueueFamilyProperties) + * { + * VK_OUTARRAY_MAKE(props, pQueueFamilyProperties, + * pQueueFamilyPropertyCount); + * + * vk_outarray_append(&props, p) { + * p->queueFlags = ...; + * p->queueCount = ...; + * } + * + * vk_outarray_append(&props, p) { + * p->queueFlags = ...; + * p->queueCount = ...; + * } + * + * return vk_outarray_status(&props); + * } + */ +struct __vk_outarray { + /** May be null. */ + void *data; + + /** + * Capacity, in number of elements. Capacity is unlimited (UINT32_MAX) if + * data is null. + */ + uint32_t cap; + + /** + * Count of elements successfully written to the array. Every write is + * considered successful if data is null. + */ + uint32_t *filled_len; + + /** + * Count of elements that would have been written to the array if its + * capacity were sufficient. Vulkan functions often return VK_INCOMPLETE + * when `*filled_len < wanted_len`. + */ + uint32_t wanted_len; +}; + +static inline void +__vk_outarray_init(struct __vk_outarray *a, + void *data, uint32_t *restrict len) +{ + a->data = data; + a->cap = *len; + a->filled_len = len; + *a->filled_len = 0; + a->wanted_len = 0; + + if (a->data == NULL) + a->cap = UINT32_MAX; +} + +static inline VkResult +__vk_outarray_status(const struct __vk_outarray *a) +{ + if (*a->filled_len < a->wanted_len) + return VK_INCOMPLETE; + else + return VK_SUCCESS; +} + +static inline void * +__vk_outarray_next(struct __vk_outarray *a, size_t elem_size) +{ + void *p = NULL; + + a->wanted_len += 1; + + if (*a->filled_len >= a->cap) + return NULL; + + if (a->data != NULL) + p = (uint8_t *)a->data + (*a->filled_len) * elem_size; + + *a->filled_len += 1; + + return p; +} + +#define vk_outarray(elem_t) \ + struct { \ + struct __vk_outarray base; \ + elem_t meta[]; \ + } + +#define vk_outarray_typeof_elem(a) __typeof__((a)->meta[0]) +#define vk_outarray_sizeof_elem(a) sizeof((a)->meta[0]) + +#define vk_outarray_init(a, data, len) \ + __vk_outarray_init(&(a)->base, (data), (len)) + +#define VK_OUTARRAY_MAKE(name, data, len) \ + vk_outarray(__typeof__((data)[0])) name; \ + vk_outarray_init(&name, (data), (len)) + +#define vk_outarray_status(a) \ + __vk_outarray_status(&(a)->base) + +#define vk_outarray_next(a) \ + ((vk_outarray_typeof_elem(a) *) \ + __vk_outarray_next(&(a)->base, vk_outarray_sizeof_elem(a))) + +/** + * Append to a Vulkan output array. + * + * This is a block-based macro. For example: + * + * vk_outarray_append(&a, elem) { + * elem->foo = ...; + * elem->bar = ...; + * } + * + * The array `a` has type `vk_outarray(elem_t) *`. It is usually declared with + * VK_OUTARRAY_MAKE(). The variable `elem` is block-scoped and has type + * `elem_t *`. + * + * The macro unconditionally increments the array's `wanted_len`. If the array + * is not full, then the macro also increment its `filled_len` and then + * executes the block. When the block is executed, `elem` is non-null and + * points to the newly appended element. + */ +#define vk_outarray_append(a, elem) \ + for (vk_outarray_typeof_elem(a) *elem = vk_outarray_next(a); \ + elem != NULL; elem = NULL) + +static inline void * +__vk_find_struct(void *start, VkStructureType sType) +{ + vk_foreach_struct(s, start) { + if (s->sType == sType) + return s; + } + + return NULL; +} + +#define vk_find_struct(__start, __sType) \ + __vk_find_struct((__start), VK_STRUCTURE_TYPE_##__sType) + +#define vk_find_struct_const(__start, __sType) \ + (const void *)__vk_find_struct((void *)(__start), VK_STRUCTURE_TYPE_##__sType) + +static inline void +__vk_append_struct(void *start, void *element) +{ + vk_foreach_struct(s, start) { + if (s->pNext) + continue; + + s->pNext = (struct VkBaseOutStructure *) element; + break; + } +} + +uint32_t vk_get_driver_version(void); + +uint32_t vk_get_version_override(void); + +#define VK_EXT_OFFSET (1000000000UL) +#define VK_ENUM_EXTENSION(__enum) \ + ((__enum) >= VK_EXT_OFFSET ? ((((__enum) - VK_EXT_OFFSET) / 1000UL) + 1) : 0) +#define VK_ENUM_OFFSET(__enum) \ + ((__enum) >= VK_EXT_OFFSET ? ((__enum) % 1000) : (__enum)) + +#ifdef __cplusplus +} +#endif + +#endif /* VK_UTIL_H */ diff --git a/meson.build b/meson.build new file mode 100644 index 00000000..789fb58d --- /dev/null +++ b/meson.build @@ -0,0 +1,260 @@ +# Copyright © 2019 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +project('mangohud', + ['c', 'cpp'], + version : 'v1.0.0', + license : 'MIT', + meson_version : '>= 0.46', + default_options : ['buildtype=release', 'b_ndebug=if-release', 'c_std=c99', 'cpp_std=c++14'] +) + +cc = meson.get_compiler('c') +cpp = meson.get_compiler('cpp') +prog_python = import('python').find_installation('python3') + +pre_args = [ + '-D__STDC_CONSTANT_MACROS', + '-D__STDC_FORMAT_MACROS', + '-D__STDC_LIMIT_MACROS', + '-DPACKAGE_VERSION="@0@"'.format(meson.project_version()), +] + +# Define DEBUG for debug builds only (debugoptimized is not included on this one) +if get_option('buildtype') == 'debug' + pre_args += '-DDEBUG' +endif + +# TODO: this is very incomplete +if ['linux', 'cygwin', 'gnu'].contains(host_machine.system()) + pre_args += '-D_GNU_SOURCE' + pre_args += '-DHAVE_PTHREAD' +endif + +# Check for GCC style atomics +if cc.compiles('''#include + int main() { + struct { + uint64_t *v; + } x; + return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE) & + (int)__atomic_add_fetch(x.v, (uint64_t)1, __ATOMIC_ACQ_REL); + + }''', + name : 'GCC atomic builtins') + pre_args += '-DUSE_GCC_ATOMIC_BUILTINS' +endif + +# Not in C99, needs POSIX +if cc.compiles(''' + #define _GNU_SOURCE + #include + int main() { + struct timespec ts; + return timespec_get(&ts, TIME_UTC); + + }''', + name : 'Supports timespec_get') + pre_args += '-DHAVE_TIMESPEC_GET' +endif + +# Check for GCC style builtins +foreach b : ['bswap32', 'bswap64', 'clz', 'clzll', 'ctz', 'expect', 'ffs', + 'ffsll', 'popcount', 'popcountll', 'unreachable'] + if cc.has_function(b) + pre_args += '-DHAVE___BUILTIN_@0@'.format(b.to_upper()) + endif +endforeach + +null_dep = dependency('', required : false) + +vulkan_wsi_args = [] +vulkan_wsi_deps = [] + +with_platform_x11 = true +with_platform_wayland = false +with_xlib_lease = true + +dep_x11 = dependency('x11') +dep_xext = dependency('xext') +dep_xcb = dependency('xcb') +dep_x11_xcb = dependency('x11-xcb') +dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8') +dep_libdrm = dependency( + 'libdrm', version : '>=' + '2.4.81', + required : true +) + +pre_args += '-DHAVE_DRI3' +dep_xcb_dri3 = dependency('xcb-dri3') +dep_xcb_present = dependency('xcb-present') +# until xcb-dri3 has been around long enough to make a hard-dependency: +if (dep_xcb_dri3.version().version_compare('>= 1.13') and + dep_xcb_present.version().version_compare('>= 1.13')) +pre_args += '-DHAVE_DRI3_MODIFIERS' +endif +dep_xcb_sync = dependency('xcb-sync') +dep_xshmfence = dependency('xshmfence', version : '>= 1.1') + +if with_platform_x11 + vulkan_wsi_args += ['-DVK_USE_PLATFORM_XCB_KHR', '-DVK_USE_PLATFORM_XLIB_KHR'] + vulkan_wsi_deps += [ + dep_xcb, + dep_x11_xcb, + dep_xcb_dri2, + dep_xcb_dri3, + dep_xcb_present, + dep_xcb_sync, + dep_xshmfence, + ] +endif +if with_platform_wayland + dep_wayland_client = dependency('wayland-client', version : '>=1.11') + vulkan_wsi_args += ['-DVK_USE_PLATFORM_WAYLAND_KHR'] + vulkan_wsi_deps += dep_wayland_client +endif + +vulkan_wsi_args += '-DVK_USE_PLATFORM_DISPLAY_KHR' +vulkan_wsi_deps += [dep_libdrm] + +if with_xlib_lease + dep_xcb_xrandr = dependency('xcb-randr') + dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3') + vulkan_wsi_args += '-DVK_USE_PLATFORM_XLIB_XRANDR_EXT' + vulkan_wsi_deps += [dep_xcb_xrandr, dep_xlib_xrandr] +endif + +inc_common = [ + include_directories('include'), +] + +# Check for generic C arguments +c_args = [] +foreach a : ['-Werror=implicit-function-declaration', + '-Werror=missing-prototypes', '-Werror=return-type', + '-Werror=incompatible-pointer-types', + '-fno-math-errno', + '-fno-trapping-math', '-Qunused-arguments'] + if cc.has_argument(a) + c_args += a + endif +endforeach + +foreach a : ['missing-field-initializers', 'format-truncation'] + if cc.has_argument('-W' + a) + c_args += '-Wno-' + a + endif +endforeach + +c_vis_args = [] +if cc.has_argument('-fvisibility=hidden') + c_vis_args += '-fvisibility=hidden' +endif + +# Check for generic C++ arguments +cpp_args = [] +foreach a : ['-Werror=return-type', + '-fno-math-errno', '-fno-trapping-math', + '-Qunused-arguments'] + if cpp.has_argument(a) + cpp_args += a + endif +endforeach + +# For some reason, the test for -Wno-foo always succeeds with gcc, even if the +# option is not supported. Hence, check for -Wfoo instead. + +foreach a : ['non-virtual-dtor', 'missing-field-initializers', 'format-truncation'] + if cpp.has_argument('-W' + a) + cpp_args += '-Wno-' + a + endif +endforeach + +no_override_init_args = [] +foreach a : ['override-init', 'initializer-overrides'] + if cc.has_argument('-W' + a) + no_override_init_args += '-Wno-' + a + endif +endforeach + +cpp_vis_args = [] +if cpp.has_argument('-fvisibility=hidden') + cpp_vis_args += '-fvisibility=hidden' +endif + +foreach a : pre_args + add_project_arguments(a, language : ['c', 'cpp']) +endforeach +foreach a : c_args + add_project_arguments(a, language : ['c']) +endforeach +foreach a : cpp_args + add_project_arguments(a, language : ['cpp']) +endforeach + +# check for dl support +if cc.has_function('dlopen') + dep_dl = null_dep +else + dep_dl = cc.find_library('dl') +endif + +dep_pthread = cc.find_library('pthread') + +git_sha1_gen_py = files('bin/git_sha1_gen.py') +sha1_h = custom_target( + 'git_sha1.h', + output : 'git_sha1.h', + command : [prog_python, git_sha1_gen_py, '--output', '@OUTPUT@'], + build_always : true, # commit sha1 can change without having touched these files +) + +vk_layer_table_helpers = [] +loader_genvk_py = files('modules/Vulkan-Loader/scripts/loader_genvk.py') +foreach s : ['vk_dispatch_table_helper.h', 'vk_layer_dispatch_table.h']#, 'vk_loader_extensions.h', 'vk_loader_extensions.c'] + vk_layer_table_helpers += custom_target( + s, output : s, + command : [prog_python, loader_genvk_py, + '-scripts', '../../Vulkan-Docs/scripts', # relative to loader_genvk.py + '-registry', join_paths(meson.source_root(), 'modules/Vulkan-Docs/xml/vk.xml'), + '-o','@OUTDIR@', s]) +endforeach + +vk_api_xml = files('modules/Vulkan-Docs/xml/vk.xml') +vk_enum_to_str = custom_target( + 'vk_enum_to_str', + input : ['bin/gen_enum_to_str.py', vk_api_xml], + output : ['vk_enum_to_str.c', 'vk_enum_to_str.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--outdir', meson.current_build_dir() + ], +) + +util_files = files( + 'src/mesa/util/hash_table.c', + 'src/mesa/util/os_socket.c', + 'src/mesa/util/os_time.c', + 'src/mesa/util/ralloc.c', + 'src/mesa/main/hash.c', +) + +subdir('modules/ImGui') +subdir('src') \ No newline at end of file diff --git a/src/cpu_gpu.h b/src/cpu_gpu.h new file mode 100644 index 00000000..26e4c72f --- /dev/null +++ b/src/cpu_gpu.h @@ -0,0 +1,265 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +int gpuLoad, gpuTemp, cpuTemp; +string gpuLoadDisplay, cpuTempLocation; +FILE *amdGpuFile, *amdTempFile, *cpuTempFile; + +const int NUM_CPU_STATES = 10; + +struct Cpus{ + size_t num; + string name; + int value; + string output; + int freq; +}; + +size_t numCpuCores = std::thread::hardware_concurrency(); +size_t arraySize = numCpuCores + 1; +std::vector cpuArray; +pthread_t cpuThread, gpuThread, cpuInfoThread, nvidiaSmiThread; + +string exec(string command) { + char buffer[128]; + string result = ""; + + // Open pipe to file + FILE* pipe = popen(command.c_str(), "r"); + if (!pipe) { + return "popen failed!"; + } + + // read till end of process: + while (!feof(pipe)) { + + // use buffer to read and add to result + if (fgets(buffer, 128, pipe) != NULL) + result += buffer; + } + + pclose(pipe); + return result; +} + +void coreCounting(){ + cpuArray.push_back({0, "CPU:"}); + for (size_t i = 0; i < arraySize; i++) { + size_t offset = i; + stringstream ss; + ss << "CPU " << offset << ":"; + string cpuNameString = ss.str(); + cpuArray.push_back({i+1 , cpuNameString}); + } +} + +std::string m_cpuUtilizationString; + +enum CPUStates +{ + S_USER = 0, + S_NICE, + S_SYSTEM, + S_IDLE, + S_IOWAIT, + S_IRQ, + S_SOFTIRQ, + S_STEAL, + S_GUEST, + S_GUEST_NICE +}; + +typedef struct CPUData +{ + std::string cpu; + size_t times[NUM_CPU_STATES]; +} CPUData; + +void ReadStatsCPU(std::vector & entries) +{ + std::ifstream fileStat("/proc/stat"); + + std::string line; + + const std::string STR_CPU("cpu"); + const std::size_t LEN_STR_CPU = STR_CPU.size(); + const std::string STR_TOT("tot"); + + while(std::getline(fileStat, line)) + { + // cpu stats line found + if(!line.compare(0, LEN_STR_CPU, STR_CPU)) + { + std::istringstream ss(line); + + // store entry + entries.emplace_back(CPUData()); + CPUData & entry = entries.back(); + + // read cpu label + ss >> entry.cpu; + + if(entry.cpu.size() > LEN_STR_CPU) + entry.cpu.erase(0, LEN_STR_CPU); + else + entry.cpu = STR_TOT; + + // read times + for(int i = 0; i < NUM_CPU_STATES; ++i) + ss >> entry.times[i]; + } + } +} + +size_t GetIdleTime(const CPUData & e) +{ + return e.times[S_IDLE] + + e.times[S_IOWAIT]; +} + +size_t GetActiveTime(const CPUData & e) +{ + return e.times[S_USER] + + e.times[S_NICE] + + e.times[S_SYSTEM] + + e.times[S_IRQ] + + e.times[S_SOFTIRQ] + + e.times[S_STEAL] + + e.times[S_GUEST] + + e.times[S_GUEST_NICE]; +} + +void PrintStats(const std::vector & entries1, const std::vector & entries2) +{ + const size_t NUM_ENTRIES = entries1.size(); + + for(size_t i = 0; i < NUM_ENTRIES; ++i) + { + const CPUData & e1 = entries1[i]; + const CPUData & e2 = entries2[i]; + + const float ACTIVE_TIME = static_cast(GetActiveTime(e2) - GetActiveTime(e1)); + const float IDLE_TIME = static_cast(GetIdleTime(e2) - GetIdleTime(e1)); + const float TOTAL_TIME = ACTIVE_TIME + IDLE_TIME; + + cpuArray[i].value = (truncf(100.f * ACTIVE_TIME / TOTAL_TIME) * 10 / 10); + } +} + +void *cpuInfo(void *){ + FILE *cpuInfo = fopen("/proc/cpuinfo", "r"); + char line[256]; + int i = 0; + while (fgets(line, sizeof(line), cpuInfo)) { + std::string row; + row = line; + if (row.find("MHz") != std::string::npos){ + row = std::regex_replace(row, std::regex(R"([^0-9.])"), ""); + cpuArray[i + 1].freq = stoi(row); + i++; + } + } + + fclose(cpuInfo); + + char buff[6]; + rewind(cpuTempFile); + fflush(cpuTempFile); + fscanf(cpuTempFile, "%s", buff); + cpuTemp = stoi(buff) / 1000; + pthread_detach(cpuInfoThread); + + return NULL; +} + +void *queryNvidiaSmi(void *){ + vector smiArray; + string nvidiaSmi = exec("nvidia-smi --query-gpu=utilization.gpu,temperature.gpu --format=csv,noheader | tr -d ' ' | head -n1 | tr -d '%'"); + istringstream f(nvidiaSmi); + string s; + while (getline(f, s, ',')) { + smiArray.push_back(s); + } + gpuLoadDisplay = smiArray[0]; + gpuLoad = stoi(smiArray[0]); + gpuTemp = stoi(smiArray[1]); + + pthread_detach(nvidiaSmiThread); + return NULL; +} + +void *getAmdGpuUsage(void *){ + char buff[5]; + rewind(amdGpuFile); + fflush(amdGpuFile); + fscanf(amdGpuFile, "%s", buff); + gpuLoadDisplay = buff; + gpuLoad = stoi(buff); + + rewind(amdTempFile); + fflush(amdTempFile); + fscanf(amdTempFile, "%s", buff); + gpuTemp = (stoi(buff) / 1000); + + pthread_detach(gpuThread); + return NULL; +} + +void *getCpuUsage(void *) +{ + std::vector entries1; + std::vector entries2; + + // snapshot 1 + ReadStatsCPU(entries1); + + // 100ms pause + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // snapshot 2 + ReadStatsCPU(entries2); + + // print output + PrintStats(entries1, entries2); + pthread_detach(cpuThread); + return NULL; +} + + +void updateCpuStrings(){ + for (size_t i = 0; i < arraySize; i++) { + size_t spacing = 10; + string value = to_string(cpuArray[i].value); + value.erase( value.find_last_not_of('0') + 1, std::string::npos ); + size_t correctionValue = (spacing - cpuArray[i].name.length()) - value.length(); + string correction = ""; + for (size_t i = 0; i < correctionValue; i++) { + correction.append(" "); + } + stringstream ss; + if (i < 11) { + if (i == 0) { + ss << cpuArray[i].name << " " << cpuArray[i].value << "%"; + } else { + ss << cpuArray[i].name << correction << cpuArray[i].value << "%"; + } + } else { + ss << cpuArray[i].name << correction << cpuArray[i].value << "%"; + } + cpuArray[i].output = ss.str(); + } + } \ No newline at end of file diff --git a/src/keybinds.h b/src/keybinds.h new file mode 100644 index 00000000..2fa403fc --- /dev/null +++ b/src/keybinds.h @@ -0,0 +1,18 @@ +#include +#include +#include "X11/keysym.h" +#include "mesa/util/os_time.h" + +double elapsedF2, elapsedF12; +uint64_t last_f2_press, last_f12_press; +pthread_t f2; +char *displayid = getenv("DISPLAY"); +Display *dpy = XOpenDisplay(displayid); + +bool key_is_pressed(KeySym ks) { + char keys_return[32]; + XQueryKeymap(dpy, keys_return); + KeyCode kc2 = XKeysymToKeycode(dpy, ks); + bool isPressed = !!(keys_return[kc2 >> 3] & (1 << (kc2 & 7))); + return isPressed; +} diff --git a/src/logging.h b/src/logging.h new file mode 100644 index 00000000..f243df4a --- /dev/null +++ b/src/logging.h @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include + +#include "mesa/util/os_time.h" + +using namespace std; + +string os, cpu, gpu, ram, kernel, driver, deviceName; +bool sysInfoFetched; +int gpuLoadLog,cpuLoadLog,log_period; + +struct logData{ + double fps; + double cpu; + double gpu; + double previous; +}; + +double fps, elapsedLog; +std::vector logArray; +ofstream out; +const char* duration_env = std::getenv("LOG_DURATION"); +const char* mangohud_output_env = std::getenv("MANGOHUD_OUTPUT"); +const char* log_period_env = std::getenv("LOG_PERIOD"); +int duration, num; +bool loggingOn; +uint64_t log_start; + +void writeFile(string date){ + out.open(mangohud_output_env + date, ios::out | ios::app); + out << "os," << "cpu," << "gpu," << "ram," << "kernel," << "driver" << endl; + out << os << "," << cpu << "," << gpu << "," << ram << "," << kernel << "," << driver << endl; + for (size_t i = 0; i < logArray.size(); i++) { + out << logArray[i].fps << "," << logArray[i].cpu << "," << logArray[i].gpu << endl; + } + out.close(); + logArray.clear(); +} + +void *logging(void *){ + time_t now_log = time(0); + tm *log_time = localtime(&now_log); + string date = to_string(log_time->tm_year + 1900) + "-" + to_string(1 + log_time->tm_mon) + "-" + to_string(log_time->tm_mday) + "_" + to_string(1 + log_time->tm_hour) + "-" + to_string(1 + log_time->tm_min) + "-" + to_string(1 + log_time->tm_sec); + log_start = os_time_get(); + out.open(mangohud_output_env + date, ios::out | ios::app); + + while (loggingOn){ + uint64_t now = os_time_get(); + elapsedLog = (double)(now - log_start); + out << fps << "," << cpuLoadLog << "," << gpuLoadLog << "," << now - log_start << endl; + // logArray.push_back({fps, cpuLoadLog, gpuLoadLog, 0.0f}); + + if ((elapsedLog) >= duration * 1000000 && duration_env) + loggingOn = false; + + this_thread::sleep_for(chrono::milliseconds(log_period)); + } + // writeFile(date); + out.close(); + return NULL; +} \ No newline at end of file diff --git a/src/mesa-overlay-control.py b/src/mesa-overlay-control.py new file mode 100755 index 00000000..6947250c --- /dev/null +++ b/src/mesa-overlay-control.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +import os +import socket +import sys +import select +from select import EPOLLIN, EPOLLPRI, EPOLLERR +import time +from collections import namedtuple +import argparse + +TIMEOUT = 1.0 # seconds + +VERSION_HEADER = bytearray('MesaOverlayControlVersion', 'utf-8') +DEVICE_NAME_HEADER = bytearray('DeviceName', 'utf-8') +MESA_VERSION_HEADER = bytearray('MesaVersion', 'utf-8') + +DEFAULT_SERVER_ADDRESS = "\0mesa_overlay" + +class Connection: + def __init__(self, path): + # Create a Unix Domain socket and connect + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + try: + sock.connect(path) + except socket.error as msg: + print(msg) + sys.exit(1) + + self.sock = sock + + # initialize poll interface and register socket + epoll = select.epoll() + epoll.register(sock, EPOLLIN | EPOLLPRI | EPOLLERR) + self.epoll = epoll + + def recv(self, timeout): + ''' + timeout as float in seconds + returns: + - None on error or disconnection + - bytes() (empty) on timeout + ''' + + events = self.epoll.poll(timeout) + for ev in events: + (fd, event) = ev + if fd != self.sock.fileno(): + continue + + # check for socket error + if event & EPOLLERR: + return None + + # EPOLLIN or EPOLLPRI, just read the message + msg = self.sock.recv(4096) + + # socket disconnected + if len(msg) == 0: + return None + + return msg + + return bytes() + + def send(self, msg): + self.sock.send(msg) + +class MsgParser: + MSGBEGIN = bytes(':', 'utf-8')[0] + MSGEND = bytes(';', 'utf-8')[0] + MSGSEP = bytes('=', 'utf-8')[0] + + def __init__(self, conn): + self.cmdpos = 0 + self.parampos = 0 + self.bufferpos = 0 + self.reading_cmd = False + self.reading_param = False + self.buffer = None + self.cmd = bytearray(4096) + self.param = bytearray(4096) + + self.conn = conn + + def readCmd(self, ncmds, timeout=TIMEOUT): + ''' + returns: + - None on error or disconnection + - bytes() (empty) on timeout + ''' + + parsed = [] + + remaining = timeout + + while remaining > 0 and ncmds > 0: + now = time.monotonic() + + if self.buffer == None: + self.buffer = self.conn.recv(remaining) + self.bufferpos = 0 + + # disconnected or error + if self.buffer == None: + return None + + for i in range(self.bufferpos, len(self.buffer)): + c = self.buffer[i] + self.bufferpos += 1 + if c == self.MSGBEGIN: + self.cmdpos = 0 + self.parampos = 0 + self.reading_cmd = True + self.reading_param = False + elif c == self.MSGEND: + if not self.reading_cmd: + continue + self.reading_cmd = False + self.reading_param = False + + cmd = self.cmd[0:self.cmdpos] + param = self.param[0:self.parampos] + self.reading_cmd = False + self.reading_param = False + + parsed.append((cmd, param)) + ncmds -= 1 + if ncmds == 0: + break + elif c == self.MSGSEP: + if self.reading_cmd: + self.reading_param = True + else: + if self.reading_param: + self.param[self.parampos] = c + self.parampos += 1 + elif self.reading_cmd: + self.cmd[self.cmdpos] = c + self.cmdpos += 1 + + # if we read the entire buffer and didn't finish the command, + # throw it away + self.buffer = None + + # check if we have time for another iteration + elapsed = time.monotonic() - now + remaining = max(0, remaining - elapsed) + + # timeout + return parsed + +def control(args): + if args.socket: + address = '\0' + args.socket + else: + address = DEFAULT_SERVER_ADDRESS + + conn = Connection(address) + msgparser = MsgParser(conn) + + version = None + name = None + mesa_version = None + + msgs = msgparser.readCmd(3) + + for m in msgs: + cmd, param = m + if cmd == VERSION_HEADER: + version = int(param) + elif cmd == DEVICE_NAME_HEADER: + name = param.decode('utf-8') + elif cmd == MESA_VERSION_HEADER: + mesa_version = param.decode('utf-8') + + if version != 1 or name == None or mesa_version == None: + print('ERROR: invalid protocol') + sys.exit(1) + + + if args.info: + info = "Protocol Version: {}\n" + info += "Device Name: {}\n" + info += "Mesa Version: {}" + print(info.format(version, name, mesa_version)) + + if args.cmd == 'start-capture': + conn.send(bytearray(':capture=1;', 'utf-8')) + elif args.cmd == 'stop-capture': + conn.send(bytearray(':capture=0;', 'utf-8')) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='MESA_overlay control client') + parser.add_argument('--info', action='store_true', help='Print info from socket') + parser.add_argument('--socket', '-s', type=str, help='Path to socket') + + commands = parser.add_subparsers(help='commands to run', dest='cmd') + commands.add_parser('start-capture') + commands.add_parser('stop-capture') + + args = parser.parse_args() + + control(args) diff --git a/src/mesa/c11/threads.h b/src/mesa/c11/threads.h new file mode 100644 index 00000000..17ded84b --- /dev/null +++ b/src/mesa/c11/threads.h @@ -0,0 +1,73 @@ +/* + * C11 emulation library + * + * (C) Copyright yohhoy 2012. + * Distributed under the Boost Software License, Version 1.0. + * + * Permission is hereby granted, free of charge, to any person or organization + * obtaining a copy of the software and accompanying documentation covered by + * this license (the "Software") to use, reproduce, display, distribute, + * execute, and transmit the Software, and to prepare [[derivative work]]s of the + * Software, and to permit third-parties to whom the Software is furnished to + * do so, all subject to the following: + * + * The copyright notices in the Software and this entire statement, including + * the above license grant, this restriction and the following disclaimer, + * must be included in all copies of the Software, in whole or in part, and + * all derivative works of the Software, unless such copies or derivative + * works are solely in the form of machine-executable object code generated by + * a source language processor. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT + * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE + * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef EMULATED_THREADS_H_INCLUDED_ +#define EMULATED_THREADS_H_INCLUDED_ + +#include + +#ifndef TIME_UTC +#define TIME_UTC 1 +#endif + +#include "../c99_compat.h" /* for `inline` */ + +/*---------------------------- types ----------------------------*/ +typedef void (*tss_dtor_t)(void*); +typedef int (*thrd_start_t)(void*); + + +/*-------------------- enumeration constants --------------------*/ +enum { + mtx_plain = 0, + mtx_try = 1, + mtx_timed = 2, + mtx_recursive = 4 +}; + +enum { + thrd_success = 0, // succeeded + thrd_timeout, // timeout + thrd_error, // failed + thrd_busy, // resource busy + thrd_nomem // out of memory +}; + +/*-------------------------- functions --------------------------*/ + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include "threads_win32.h" +#elif defined(HAVE_PTHREAD) +#include "threads_posix.h" +#else +#error Not supported on this platform. +#endif + + + +#endif /* EMULATED_THREADS_H_INCLUDED_ */ diff --git a/src/mesa/c11/threads_posix.h b/src/mesa/c11/threads_posix.h new file mode 100644 index 00000000..45cb6075 --- /dev/null +++ b/src/mesa/c11/threads_posix.h @@ -0,0 +1,396 @@ +/* + * C11 emulation library + * + * (C) Copyright yohhoy 2012. + * Distributed under the Boost Software License, Version 1.0. + * + * Permission is hereby granted, free of charge, to any person or organization + * obtaining a copy of the software and accompanying documentation covered by + * this license (the "Software") to use, reproduce, display, distribute, + * execute, and transmit the Software, and to prepare [[derivative work]]s of the + * Software, and to permit third-parties to whom the Software is furnished to + * do so, all subject to the following: + * + * The copyright notices in the Software and this entire statement, including + * the above license grant, this restriction and the following disclaimer, + * must be included in all copies of the Software, in whole or in part, and + * all derivative works of the Software, unless such copies or derivative + * works are solely in the form of machine-executable object code generated by + * a source language processor. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT + * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE + * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#ifndef assert +#include +#endif +#include +#include +#include +#include +#include /* for intptr_t */ + +/* +Configuration macro: + + EMULATED_THREADS_USE_NATIVE_TIMEDLOCK + Use pthread_mutex_timedlock() for `mtx_timedlock()' + Otherwise use mtx_trylock() + *busy loop* emulation. +*/ +#if !defined(__CYGWIN__) && !defined(__APPLE__) && !defined(__NetBSD__) +#define EMULATED_THREADS_USE_NATIVE_TIMEDLOCK +#endif + + +#include + +/*---------------------------- macros ----------------------------*/ +#define ONCE_FLAG_INIT PTHREAD_ONCE_INIT +#ifdef INIT_ONCE_STATIC_INIT +#define TSS_DTOR_ITERATIONS PTHREAD_DESTRUCTOR_ITERATIONS +#else +#define TSS_DTOR_ITERATIONS 1 // assume TSS dtor MAY be called at least once. +#endif + +// FIXME: temporary non-standard hack to ease transition +#define _MTX_INITIALIZER_NP PTHREAD_MUTEX_INITIALIZER + +/*---------------------------- types ----------------------------*/ +typedef pthread_cond_t cnd_t; +typedef pthread_t thrd_t; +typedef pthread_key_t tss_t; +typedef pthread_mutex_t mtx_t; +typedef pthread_once_t once_flag; + + +/* +Implementation limits: + - Conditionally emulation for "mutex with timeout" + (see EMULATED_THREADS_USE_NATIVE_TIMEDLOCK macro) +*/ +struct impl_thrd_param { + thrd_start_t func; + void *arg; +}; + +static inline void * +impl_thrd_routine(void *p) +{ + struct impl_thrd_param pack = *((struct impl_thrd_param *)p); + free(p); + return (void*)(intptr_t)pack.func(pack.arg); +} + + +/*--------------- 7.25.2 Initialization functions ---------------*/ +// 7.25.2.1 +static inline void +call_once(once_flag *flag, void (*func)(void)) +{ + pthread_once(flag, func); +} + + +/*------------- 7.25.3 Condition variable functions -------------*/ +// 7.25.3.1 +static inline int +cnd_broadcast(cnd_t *cond) +{ + assert(cond != NULL); + return (pthread_cond_broadcast(cond) == 0) ? thrd_success : thrd_error; +} + +// 7.25.3.2 +static inline void +cnd_destroy(cnd_t *cond) +{ + assert(cond); + pthread_cond_destroy(cond); +} + +// 7.25.3.3 +static inline int +cnd_init(cnd_t *cond) +{ + assert(cond != NULL); + return (pthread_cond_init(cond, NULL) == 0) ? thrd_success : thrd_error; +} + +// 7.25.3.4 +static inline int +cnd_signal(cnd_t *cond) +{ + assert(cond != NULL); + return (pthread_cond_signal(cond) == 0) ? thrd_success : thrd_error; +} + +// 7.25.3.5 +static inline int +cnd_timedwait(cnd_t *cond, mtx_t *mtx, const struct timespec *abs_time) +{ + int rt; + + assert(mtx != NULL); + assert(cond != NULL); + assert(abs_time != NULL); + + rt = pthread_cond_timedwait(cond, mtx, abs_time); + if (rt == ETIMEDOUT) + return thrd_busy; + return (rt == 0) ? thrd_success : thrd_error; +} + +// 7.25.3.6 +static inline int +cnd_wait(cnd_t *cond, mtx_t *mtx) +{ + assert(mtx != NULL); + assert(cond != NULL); + return (pthread_cond_wait(cond, mtx) == 0) ? thrd_success : thrd_error; +} + + +/*-------------------- 7.25.4 Mutex functions --------------------*/ +// 7.25.4.1 +static inline void +mtx_destroy(mtx_t *mtx) +{ + assert(mtx != NULL); + pthread_mutex_destroy(mtx); +} + +/* + * XXX: Workaround when building with -O0 and without pthreads link. + * + * In such cases constant folding and dead code elimination won't be + * available, thus the compiler will always add the pthread_mutexattr* + * functions into the binary. As we try to link, we'll fail as the + * symbols are unresolved. + * + * Ideally we'll enable the optimisations locally, yet that does not + * seem to work. + * + * So the alternative workaround is to annotate the symbols as weak. + * Thus the linker will be happy and things don't clash when building + * with -O1 or greater. + */ +#if defined(HAVE_FUNC_ATTRIBUTE_WEAK) && !defined(__CYGWIN__) +__attribute__((weak)) +int pthread_mutexattr_init(pthread_mutexattr_t *attr); + +__attribute__((weak)) +int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type); + +__attribute__((weak)) +int pthread_mutexattr_destroy(pthread_mutexattr_t *attr); +#endif + +// 7.25.4.2 +static inline int +mtx_init(mtx_t *mtx, int type) +{ + pthread_mutexattr_t attr; + assert(mtx != NULL); + if (type != mtx_plain && type != mtx_timed && type != mtx_try + && type != (mtx_plain|mtx_recursive) + && type != (mtx_timed|mtx_recursive) + && type != (mtx_try|mtx_recursive)) + return thrd_error; + + if ((type & mtx_recursive) == 0) { + pthread_mutex_init(mtx, NULL); + return thrd_success; + } + + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(mtx, &attr); + pthread_mutexattr_destroy(&attr); + return thrd_success; +} + +// 7.25.4.3 +static inline int +mtx_lock(mtx_t *mtx) +{ + assert(mtx != NULL); + return (pthread_mutex_lock(mtx) == 0) ? thrd_success : thrd_error; +} + +static inline int +mtx_trylock(mtx_t *mtx); + +static inline void +thrd_yield(void); + +// 7.25.4.4 +static inline int +mtx_timedlock(mtx_t *mtx, const struct timespec *ts) +{ + assert(mtx != NULL); + assert(ts != NULL); + + { +#ifdef EMULATED_THREADS_USE_NATIVE_TIMEDLOCK + int rt; + rt = pthread_mutex_timedlock(mtx, ts); + if (rt == 0) + return thrd_success; + return (rt == ETIMEDOUT) ? thrd_busy : thrd_error; +#else + time_t expire = time(NULL); + expire += ts->tv_sec; + while (mtx_trylock(mtx) != thrd_success) { + time_t now = time(NULL); + if (expire < now) + return thrd_busy; + // busy loop! + thrd_yield(); + } + return thrd_success; +#endif + } +} + +// 7.25.4.5 +static inline int +mtx_trylock(mtx_t *mtx) +{ + assert(mtx != NULL); + return (pthread_mutex_trylock(mtx) == 0) ? thrd_success : thrd_busy; +} + +// 7.25.4.6 +static inline int +mtx_unlock(mtx_t *mtx) +{ + assert(mtx != NULL); + return (pthread_mutex_unlock(mtx) == 0) ? thrd_success : thrd_error; +} + + +/*------------------- 7.25.5 Thread functions -------------------*/ +// 7.25.5.1 +static inline int +thrd_create(thrd_t *thr, thrd_start_t func, void *arg) +{ + struct impl_thrd_param *pack; + assert(thr != NULL); + pack = (struct impl_thrd_param *)malloc(sizeof(struct impl_thrd_param)); + if (!pack) return thrd_nomem; + pack->func = func; + pack->arg = arg; + if (pthread_create(thr, NULL, impl_thrd_routine, pack) != 0) { + free(pack); + return thrd_error; + } + return thrd_success; +} + +// 7.25.5.2 +static inline thrd_t +thrd_current(void) +{ + return pthread_self(); +} + +// 7.25.5.3 +static inline int +thrd_detach(thrd_t thr) +{ + return (pthread_detach(thr) == 0) ? thrd_success : thrd_error; +} + +// 7.25.5.4 +static inline int +thrd_equal(thrd_t thr0, thrd_t thr1) +{ + return pthread_equal(thr0, thr1); +} + +// 7.25.5.5 +static inline void +thrd_exit(int res) +{ + pthread_exit((void*)(intptr_t)res); +} + +// 7.25.5.6 +static inline int +thrd_join(thrd_t thr, int *res) +{ + void *code; + if (pthread_join(thr, &code) != 0) + return thrd_error; + if (res) + *res = (int)(intptr_t)code; + return thrd_success; +} + +// 7.25.5.7 +static inline void +thrd_sleep(const struct timespec *time_point, struct timespec *remaining) +{ + assert(time_point != NULL); + nanosleep(time_point, remaining); +} + +// 7.25.5.8 +static inline void +thrd_yield(void) +{ + sched_yield(); +} + + +/*----------- 7.25.6 Thread-specific storage functions -----------*/ +// 7.25.6.1 +static inline int +tss_create(tss_t *key, tss_dtor_t dtor) +{ + assert(key != NULL); + return (pthread_key_create(key, dtor) == 0) ? thrd_success : thrd_error; +} + +// 7.25.6.2 +static inline void +tss_delete(tss_t key) +{ + pthread_key_delete(key); +} + +// 7.25.6.3 +static inline void * +tss_get(tss_t key) +{ + return pthread_getspecific(key); +} + +// 7.25.6.4 +static inline int +tss_set(tss_t key, void *val) +{ + return (pthread_setspecific(key, val) == 0) ? thrd_success : thrd_error; +} + + +/*-------------------- 7.25.7 Time functions --------------------*/ +// 7.25.6.1 +#ifndef HAVE_TIMESPEC_GET +static inline int +timespec_get(struct timespec *ts, int base) +{ + if (!ts) return 0; + if (base == TIME_UTC) { + clock_gettime(CLOCK_REALTIME, ts); + return base; + } + return 0; +} +#endif diff --git a/src/mesa/c11/threads_win32.h b/src/mesa/c11/threads_win32.h new file mode 100644 index 00000000..326cfc46 --- /dev/null +++ b/src/mesa/c11/threads_win32.h @@ -0,0 +1,653 @@ +/* + * C11 emulation library + * + * (C) Copyright yohhoy 2012. + * Distributed under the Boost Software License, Version 1.0. + * + * Permission is hereby granted, free of charge, to any person or organization + * obtaining a copy of the software and accompanying documentation covered by + * this license (the "Software") to use, reproduce, display, distribute, + * execute, and transmit the Software, and to prepare [[derivative work]]s of the + * Software, and to permit third-parties to whom the Software is furnished to + * do so, all subject to the following: + * + * The copyright notices in the Software and this entire statement, including + * the above license grant, this restriction and the following disclaimer, + * must be included in all copies of the Software, in whole or in part, and + * all derivative works of the Software, unless such copies or derivative + * works are solely in the form of machine-executable object code generated by + * a source language processor. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT + * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE + * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef assert +#include +#endif +#include +#include +#include // MSVCRT +#include + +/* +Configuration macro: + + EMULATED_THREADS_USE_NATIVE_CALL_ONCE + Use native WindowsAPI one-time initialization function. + (requires WinVista or later) + Otherwise emulate by mtx_trylock() + *busy loop* for WinXP. + + EMULATED_THREADS_USE_NATIVE_CV + Use native WindowsAPI condition variable object. + (requires WinVista or later) + Otherwise use emulated implementation for WinXP. + + EMULATED_THREADS_TSS_DTOR_SLOTNUM + Max registerable TSS dtor number. +*/ + +// XXX: Retain XP compatability +#if 0 +#if _WIN32_WINNT >= 0x0600 +// Prefer native WindowsAPI on newer environment. +#if !defined(__MINGW32__) +#define EMULATED_THREADS_USE_NATIVE_CALL_ONCE +#endif +#define EMULATED_THREADS_USE_NATIVE_CV +#endif +#endif +#define EMULATED_THREADS_TSS_DTOR_SLOTNUM 64 // see TLS_MINIMUM_AVAILABLE + + +#include + +// check configuration +#if defined(EMULATED_THREADS_USE_NATIVE_CALL_ONCE) && (_WIN32_WINNT < 0x0600) +#error EMULATED_THREADS_USE_NATIVE_CALL_ONCE requires _WIN32_WINNT>=0x0600 +#endif + +#if defined(EMULATED_THREADS_USE_NATIVE_CV) && (_WIN32_WINNT < 0x0600) +#error EMULATED_THREADS_USE_NATIVE_CV requires _WIN32_WINNT>=0x0600 +#endif + +/* Visual Studio 2015 and later */ +#ifdef _MSC_VER +#define HAVE_TIMESPEC_GET +#endif + +/*---------------------------- macros ----------------------------*/ +#ifdef EMULATED_THREADS_USE_NATIVE_CALL_ONCE +#define ONCE_FLAG_INIT INIT_ONCE_STATIC_INIT +#else +#define ONCE_FLAG_INIT {0} +#endif +#define TSS_DTOR_ITERATIONS 1 + +// FIXME: temporary non-standard hack to ease transition +#define _MTX_INITIALIZER_NP {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0} + +/*---------------------------- types ----------------------------*/ +typedef struct cnd_t { +#ifdef EMULATED_THREADS_USE_NATIVE_CV + CONDITION_VARIABLE condvar; +#else + int blocked; + int gone; + int to_unblock; + HANDLE sem_queue; + HANDLE sem_gate; + CRITICAL_SECTION monitor; +#endif +} cnd_t; + +typedef HANDLE thrd_t; + +typedef DWORD tss_t; + +typedef CRITICAL_SECTION mtx_t; + +#ifdef EMULATED_THREADS_USE_NATIVE_CALL_ONCE +typedef INIT_ONCE once_flag; +#else +typedef struct once_flag_t { + volatile LONG status; +} once_flag; +#endif + + +static inline void * tss_get(tss_t key); +static inline void thrd_yield(void); +static inline int mtx_trylock(mtx_t *mtx); +static inline int mtx_lock(mtx_t *mtx); +static inline int mtx_unlock(mtx_t *mtx); + +/* +Implementation limits: + - Conditionally emulation for "Initialization functions" + (see EMULATED_THREADS_USE_NATIVE_CALL_ONCE macro) + - Emulated `mtx_timelock()' with mtx_trylock() + *busy loop* +*/ +static void impl_tss_dtor_invoke(void); // forward decl. + +struct impl_thrd_param { + thrd_start_t func; + void *arg; +}; + +static unsigned __stdcall impl_thrd_routine(void *p) +{ + struct impl_thrd_param pack; + int code; + memcpy(&pack, p, sizeof(struct impl_thrd_param)); + free(p); + code = pack.func(pack.arg); + impl_tss_dtor_invoke(); + return (unsigned)code; +} + +static DWORD impl_timespec2msec(const struct timespec *ts) +{ + return (DWORD)((ts->tv_sec * 1000U) + (ts->tv_nsec / 1000000L)); +} + +#ifdef EMULATED_THREADS_USE_NATIVE_CALL_ONCE +struct impl_call_once_param { void (*func)(void); }; +static BOOL CALLBACK impl_call_once_callback(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *Context) +{ + struct impl_call_once_param *param = (struct impl_call_once_param*)Parameter; + (param->func)(); + ((void)InitOnce); ((void)Context); // suppress warning + return TRUE; +} +#endif // ifdef EMULATED_THREADS_USE_NATIVE_CALL_ONCE + +#ifndef EMULATED_THREADS_USE_NATIVE_CV +/* +Note: + The implementation of condition variable is ported from Boost.Interprocess + See http://www.boost.org/boost/interprocess/sync/windows/condition.hpp +*/ +static void impl_cond_do_signal(cnd_t *cond, int broadcast) +{ + int nsignal = 0; + + EnterCriticalSection(&cond->monitor); + if (cond->to_unblock != 0) { + if (cond->blocked == 0) { + LeaveCriticalSection(&cond->monitor); + return; + } + if (broadcast) { + cond->to_unblock += nsignal = cond->blocked; + cond->blocked = 0; + } else { + nsignal = 1; + cond->to_unblock++; + cond->blocked--; + } + } else if (cond->blocked > cond->gone) { + WaitForSingleObject(cond->sem_gate, INFINITE); + if (cond->gone != 0) { + cond->blocked -= cond->gone; + cond->gone = 0; + } + if (broadcast) { + nsignal = cond->to_unblock = cond->blocked; + cond->blocked = 0; + } else { + nsignal = cond->to_unblock = 1; + cond->blocked--; + } + } + LeaveCriticalSection(&cond->monitor); + + if (0 < nsignal) + ReleaseSemaphore(cond->sem_queue, nsignal, NULL); +} + +static int impl_cond_do_wait(cnd_t *cond, mtx_t *mtx, const struct timespec *ts) +{ + int nleft = 0; + int ngone = 0; + int timeout = 0; + DWORD w; + + WaitForSingleObject(cond->sem_gate, INFINITE); + cond->blocked++; + ReleaseSemaphore(cond->sem_gate, 1, NULL); + + mtx_unlock(mtx); + + w = WaitForSingleObject(cond->sem_queue, ts ? impl_timespec2msec(ts) : INFINITE); + timeout = (w == WAIT_TIMEOUT); + + EnterCriticalSection(&cond->monitor); + if ((nleft = cond->to_unblock) != 0) { + if (timeout) { + if (cond->blocked != 0) { + cond->blocked--; + } else { + cond->gone++; + } + } + if (--cond->to_unblock == 0) { + if (cond->blocked != 0) { + ReleaseSemaphore(cond->sem_gate, 1, NULL); + nleft = 0; + } + else if ((ngone = cond->gone) != 0) { + cond->gone = 0; + } + } + } else if (++cond->gone == INT_MAX/2) { + WaitForSingleObject(cond->sem_gate, INFINITE); + cond->blocked -= cond->gone; + ReleaseSemaphore(cond->sem_gate, 1, NULL); + cond->gone = 0; + } + LeaveCriticalSection(&cond->monitor); + + if (nleft == 1) { + while (ngone--) + WaitForSingleObject(cond->sem_queue, INFINITE); + ReleaseSemaphore(cond->sem_gate, 1, NULL); + } + + mtx_lock(mtx); + return timeout ? thrd_busy : thrd_success; +} +#endif // ifndef EMULATED_THREADS_USE_NATIVE_CV + +static struct impl_tss_dtor_entry { + tss_t key; + tss_dtor_t dtor; +} impl_tss_dtor_tbl[EMULATED_THREADS_TSS_DTOR_SLOTNUM]; + +static int impl_tss_dtor_register(tss_t key, tss_dtor_t dtor) +{ + int i; + for (i = 0; i < EMULATED_THREADS_TSS_DTOR_SLOTNUM; i++) { + if (!impl_tss_dtor_tbl[i].dtor) + break; + } + if (i == EMULATED_THREADS_TSS_DTOR_SLOTNUM) + return 1; + impl_tss_dtor_tbl[i].key = key; + impl_tss_dtor_tbl[i].dtor = dtor; + return 0; +} + +static void impl_tss_dtor_invoke() +{ + int i; + for (i = 0; i < EMULATED_THREADS_TSS_DTOR_SLOTNUM; i++) { + if (impl_tss_dtor_tbl[i].dtor) { + void* val = tss_get(impl_tss_dtor_tbl[i].key); + if (val) + (impl_tss_dtor_tbl[i].dtor)(val); + } + } +} + + +/*--------------- 7.25.2 Initialization functions ---------------*/ +// 7.25.2.1 +static inline void +call_once(once_flag *flag, void (*func)(void)) +{ + assert(flag && func); +#ifdef EMULATED_THREADS_USE_NATIVE_CALL_ONCE + { + struct impl_call_once_param param; + param.func = func; + InitOnceExecuteOnce(flag, impl_call_once_callback, (PVOID)¶m, NULL); + } +#else + if (InterlockedCompareExchange(&flag->status, 1, 0) == 0) { + (func)(); + InterlockedExchange(&flag->status, 2); + } else { + while (flag->status == 1) { + // busy loop! + thrd_yield(); + } + } +#endif +} + + +/*------------- 7.25.3 Condition variable functions -------------*/ +// 7.25.3.1 +static inline int +cnd_broadcast(cnd_t *cond) +{ + if (!cond) return thrd_error; +#ifdef EMULATED_THREADS_USE_NATIVE_CV + WakeAllConditionVariable(&cond->condvar); +#else + impl_cond_do_signal(cond, 1); +#endif + return thrd_success; +} + +// 7.25.3.2 +static inline void +cnd_destroy(cnd_t *cond) +{ + assert(cond); +#ifdef EMULATED_THREADS_USE_NATIVE_CV + // do nothing +#else + CloseHandle(cond->sem_queue); + CloseHandle(cond->sem_gate); + DeleteCriticalSection(&cond->monitor); +#endif +} + +// 7.25.3.3 +static inline int +cnd_init(cnd_t *cond) +{ + if (!cond) return thrd_error; +#ifdef EMULATED_THREADS_USE_NATIVE_CV + InitializeConditionVariable(&cond->condvar); +#else + cond->blocked = 0; + cond->gone = 0; + cond->to_unblock = 0; + cond->sem_queue = CreateSemaphore(NULL, 0, LONG_MAX, NULL); + cond->sem_gate = CreateSemaphore(NULL, 1, 1, NULL); + InitializeCriticalSection(&cond->monitor); +#endif + return thrd_success; +} + +// 7.25.3.4 +static inline int +cnd_signal(cnd_t *cond) +{ + if (!cond) return thrd_error; +#ifdef EMULATED_THREADS_USE_NATIVE_CV + WakeConditionVariable(&cond->condvar); +#else + impl_cond_do_signal(cond, 0); +#endif + return thrd_success; +} + +// 7.25.3.5 +static inline int +cnd_timedwait(cnd_t *cond, mtx_t *mtx, const struct timespec *abs_time) +{ + if (!cond || !mtx || !abs_time) return thrd_error; +#ifdef EMULATED_THREADS_USE_NATIVE_CV + if (SleepConditionVariableCS(&cond->condvar, mtx, impl_timespec2msec(abs_time))) + return thrd_success; + return (GetLastError() == ERROR_TIMEOUT) ? thrd_busy : thrd_error; +#else + return impl_cond_do_wait(cond, mtx, abs_time); +#endif +} + +// 7.25.3.6 +static inline int +cnd_wait(cnd_t *cond, mtx_t *mtx) +{ + if (!cond || !mtx) return thrd_error; +#ifdef EMULATED_THREADS_USE_NATIVE_CV + SleepConditionVariableCS(&cond->condvar, mtx, INFINITE); +#else + impl_cond_do_wait(cond, mtx, NULL); +#endif + return thrd_success; +} + + +/*-------------------- 7.25.4 Mutex functions --------------------*/ +// 7.25.4.1 +static inline void +mtx_destroy(mtx_t *mtx) +{ + assert(mtx); + DeleteCriticalSection(mtx); +} + +// 7.25.4.2 +static inline int +mtx_init(mtx_t *mtx, int type) +{ + if (!mtx) return thrd_error; + if (type != mtx_plain && type != mtx_timed && type != mtx_try + && type != (mtx_plain|mtx_recursive) + && type != (mtx_timed|mtx_recursive) + && type != (mtx_try|mtx_recursive)) + return thrd_error; + InitializeCriticalSection(mtx); + return thrd_success; +} + +// 7.25.4.3 +static inline int +mtx_lock(mtx_t *mtx) +{ + if (!mtx) return thrd_error; + EnterCriticalSection(mtx); + return thrd_success; +} + +// 7.25.4.4 +static inline int +mtx_timedlock(mtx_t *mtx, const struct timespec *ts) +{ + time_t expire, now; + if (!mtx || !ts) return thrd_error; + expire = time(NULL); + expire += ts->tv_sec; + while (mtx_trylock(mtx) != thrd_success) { + now = time(NULL); + if (expire < now) + return thrd_busy; + // busy loop! + thrd_yield(); + } + return thrd_success; +} + +// 7.25.4.5 +static inline int +mtx_trylock(mtx_t *mtx) +{ + if (!mtx) return thrd_error; + return TryEnterCriticalSection(mtx) ? thrd_success : thrd_busy; +} + +// 7.25.4.6 +static inline int +mtx_unlock(mtx_t *mtx) +{ + if (!mtx) return thrd_error; + LeaveCriticalSection(mtx); + return thrd_success; +} + + +/*------------------- 7.25.5 Thread functions -------------------*/ +// 7.25.5.1 +static inline int +thrd_create(thrd_t *thr, thrd_start_t func, void *arg) +{ + struct impl_thrd_param *pack; + uintptr_t handle; + if (!thr) return thrd_error; + pack = (struct impl_thrd_param *)malloc(sizeof(struct impl_thrd_param)); + if (!pack) return thrd_nomem; + pack->func = func; + pack->arg = arg; + handle = _beginthreadex(NULL, 0, impl_thrd_routine, pack, 0, NULL); + if (handle == 0) { + if (errno == EAGAIN || errno == EACCES) + return thrd_nomem; + return thrd_error; + } + *thr = (thrd_t)handle; + return thrd_success; +} + +#if 0 +// 7.25.5.2 +static inline thrd_t +thrd_current(void) +{ + HANDLE hCurrentThread; + BOOL bRet; + + /* GetCurrentThread() returns a pseudo-handle, which we need + * to pass to DuplicateHandle(). Only the resulting handle can be used + * from other threads. + * + * Note that neither handle can be compared to the one by thread_create. + * Only the thread IDs - as returned by GetThreadId() and GetCurrentThreadId() + * can be compared directly. + * + * Other potential solutions would be: + * - define thrd_t as a thread Ids, but this would mean we'd need to OpenThread for many operations + * - use malloc'ed memory for thrd_t. This would imply using TLS for current thread. + * + * Neither is particularly nice. + * + * Life would be much easier if C11 threads had different abstractions for + * threads and thread IDs, just like C++11 threads does... + */ + + bRet = DuplicateHandle(GetCurrentProcess(), // source process (pseudo) handle + GetCurrentThread(), // source (pseudo) handle + GetCurrentProcess(), // target process + &hCurrentThread, // target handle + 0, + FALSE, + DUPLICATE_SAME_ACCESS); + assert(bRet); + if (!bRet) { + hCurrentThread = GetCurrentThread(); + } + return hCurrentThread; +} +#endif + +// 7.25.5.3 +static inline int +thrd_detach(thrd_t thr) +{ + CloseHandle(thr); + return thrd_success; +} + +// 7.25.5.4 +static inline int +thrd_equal(thrd_t thr0, thrd_t thr1) +{ + return GetThreadId(thr0) == GetThreadId(thr1); +} + +// 7.25.5.5 +static inline void +thrd_exit(int res) +{ + impl_tss_dtor_invoke(); + _endthreadex((unsigned)res); +} + +// 7.25.5.6 +static inline int +thrd_join(thrd_t thr, int *res) +{ + DWORD w, code; + w = WaitForSingleObject(thr, INFINITE); + if (w != WAIT_OBJECT_0) + return thrd_error; + if (res) { + if (!GetExitCodeThread(thr, &code)) { + CloseHandle(thr); + return thrd_error; + } + *res = (int)code; + } + CloseHandle(thr); + return thrd_success; +} + +// 7.25.5.7 +static inline void +thrd_sleep(const struct timespec *time_point, struct timespec *remaining) +{ + assert(time_point); + assert(!remaining); /* not implemented */ + Sleep(impl_timespec2msec(time_point)); +} + +// 7.25.5.8 +static inline void +thrd_yield(void) +{ + SwitchToThread(); +} + + +/*----------- 7.25.6 Thread-specific storage functions -----------*/ +// 7.25.6.1 +static inline int +tss_create(tss_t *key, tss_dtor_t dtor) +{ + if (!key) return thrd_error; + *key = TlsAlloc(); + if (dtor) { + if (impl_tss_dtor_register(*key, dtor)) { + TlsFree(*key); + return thrd_error; + } + } + return (*key != 0xFFFFFFFF) ? thrd_success : thrd_error; +} + +// 7.25.6.2 +static inline void +tss_delete(tss_t key) +{ + TlsFree(key); +} + +// 7.25.6.3 +static inline void * +tss_get(tss_t key) +{ + return TlsGetValue(key); +} + +// 7.25.6.4 +static inline int +tss_set(tss_t key, void *val) +{ + return TlsSetValue(key, val) ? thrd_success : thrd_error; +} + + +/*-------------------- 7.25.7 Time functions --------------------*/ +// 7.25.6.1 +#ifndef HAVE_TIMESPEC_GET +static inline int +timespec_get(struct timespec *ts, int base) +{ + if (!ts) return 0; + if (base == TIME_UTC) { + ts->tv_sec = time(NULL); + ts->tv_nsec = 0; + return base; + } + return 0; +} +#endif diff --git a/src/mesa/c11_compat.h b/src/mesa/c11_compat.h new file mode 100644 index 00000000..d35740f4 --- /dev/null +++ b/src/mesa/c11_compat.h @@ -0,0 +1,27 @@ +/* Copyright 2019 Intel Corporation */ +/* SPDX-License-Identifier: MIT */ + +#include "no_extern_c.h" + +#ifndef _C11_COMPAT_H_ +#define _C11_COMPAT_H_ + +#if defined(__cplusplus) + /* This is C++ code, not C */ +#elif (__STDC_VERSION__ >= 201112L) + /* Already C11 */ +#else + + +/* + * C11 static_assert() macro + * assert.h only defines that name for C11 and above + */ +#ifndef static_assert +#define static_assert _Static_assert +#endif + + +#endif /* !C++ && !C11 */ + +#endif /* _C11_COMPAT_H_ */ diff --git a/src/mesa/c99_compat.h b/src/mesa/c99_compat.h new file mode 100644 index 00000000..729b5b79 --- /dev/null +++ b/src/mesa/c99_compat.h @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2007-2013 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "no_extern_c.h" + +#ifndef _C99_COMPAT_H_ +#define _C99_COMPAT_H_ + + +/* + * MSVC hacks. + */ +#if defined(_MSC_VER) + +# if _MSC_VER < 1900 +# error "Microsoft Visual Studio 2015 or higher required" +# endif + + /* + * Visual Studio will complain if we define the `inline` keyword, but + * actually it only supports the keyword on C++. + * + * To avoid this the _ALLOW_KEYWORD_MACROS must be set. + */ +# if !defined(_ALLOW_KEYWORD_MACROS) +# define _ALLOW_KEYWORD_MACROS +# endif + + /* + * XXX: MSVC has a `__restrict` keyword, but it also has a + * `__declspec(restrict)` modifier, so it is impossible to define a + * `restrict` macro without interfering with the latter. Furthermore the + * MSVC standard library uses __declspec(restrict) under the _CRTRESTRICT + * macro. For now resolve this issue by redefining _CRTRESTRICT, but going + * forward we should probably should stop using restrict, especially + * considering that our code does not obbey strict aliasing rules any way. + */ +# include +# undef _CRTRESTRICT +# define _CRTRESTRICT +#endif + + +/* + * C99 inline keyword + */ +#ifndef inline +# ifdef __cplusplus + /* C++ supports inline keyword */ +# elif defined(__GNUC__) +# define inline __inline__ +# elif defined(_MSC_VER) +# define inline __inline +# elif defined(__ICL) +# define inline __inline +# elif defined(__INTEL_COMPILER) + /* Intel compiler supports inline keyword */ +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define inline __inline +# elif (__STDC_VERSION__ >= 199901L) + /* C99 supports inline keyword */ +# else +# define inline +# endif +#endif + + +/* + * C99 restrict keyword + * + * See also: + * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html + */ +#ifndef restrict +# if (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + /* C99 */ +# elif defined(__GNUC__) +# define restrict __restrict__ +# elif defined(_MSC_VER) +# define restrict __restrict +# else +# define restrict /* */ +# endif +#endif + + +/* + * C99 __func__ macro + */ +#ifndef __func__ +# if (__STDC_VERSION__ >= 199901L) + /* C99 */ +# elif defined(__GNUC__) +# define __func__ __FUNCTION__ +# elif defined(_MSC_VER) +# define __func__ __FUNCTION__ +# else +# define __func__ "" +# endif +#endif + + +/* Simple test case for debugging */ +#if 0 +static inline const char * +test_c99_compat_h(const void * restrict a, + const void * restrict b) +{ + return __func__; +} +#endif + + +/* Fallback definitions, for scons which doesn't auto-detect these things. */ +#ifdef HAVE_SCONS + +# ifndef _WIN32 +# define HAVE_PTHREAD +# define HAVE_POSIX_MEMALIGN +# endif + +# ifdef __GNUC__ +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2) +# error "GCC version 4.2 or higher required" +# endif + + /* https://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Other-Builtins.html */ +# define HAVE___BUILTIN_CLZ 1 +# define HAVE___BUILTIN_CLZLL 1 +# define HAVE___BUILTIN_CTZ 1 +# define HAVE___BUILTIN_EXPECT 1 +# define HAVE___BUILTIN_FFS 1 +# define HAVE___BUILTIN_FFSLL 1 +# define HAVE___BUILTIN_POPCOUNT 1 +# define HAVE___BUILTIN_POPCOUNTLL 1 + /* https://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Function-Attributes.html */ +# define HAVE_FUNC_ATTRIBUTE_FLATTEN 1 +# define HAVE_FUNC_ATTRIBUTE_UNUSED 1 +# define HAVE_FUNC_ATTRIBUTE_FORMAT 1 +# define HAVE_FUNC_ATTRIBUTE_PACKED 1 +# define HAVE_FUNC_ATTRIBUTE_ALIAS 1 +# define HAVE_FUNC_ATTRIBUTE_NORETURN 1 + +# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) + /* https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html */ +# define HAVE___BUILTIN_BSWAP32 1 +# define HAVE___BUILTIN_BSWAP64 1 +# endif + +# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +# define HAVE___BUILTIN_UNREACHABLE 1 +# endif + +# endif /* __GNUC__ */ + +#endif /* HAVE_SCONS */ + + +#endif /* _C99_COMPAT_H_ */ diff --git a/src/mesa/fast_urem_by_const.h b/src/mesa/fast_urem_by_const.h new file mode 100644 index 00000000..beb253d2 --- /dev/null +++ b/src/mesa/fast_urem_by_const.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2010 Valve Software + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +/* + * Code for fast 32-bit unsigned remainder, based off of "Faster Remainder by + * Direct Computation: Applications to Compilers and Software Libraries," + * available at https://arxiv.org/pdf/1902.01961.pdf. + * + * util_fast_urem32(n, d, REMAINDER_MAGIC(d)) returns the same thing as + * n % d for any unsigned n and d, however it compiles down to only a few + * multiplications, so it should be faster than plain uint32_t modulo if the + * same divisor is used many times. + */ + +#define REMAINDER_MAGIC(divisor) \ + ((uint64_t) ~0ull / (divisor) + 1) + +/* + * Get bits 64-96 of a 32x64-bit multiply. If __int128_t is available, we use + * it, which usually compiles down to one instruction on 64-bit architectures. + * Otherwise on 32-bit architectures we usually get four instructions (one + * 32x32->64 multiply, one 32x32->32 multiply, and one 64-bit add). + */ + +static inline uint32_t +_mul32by64_hi(uint32_t a, uint64_t b) +{ +#ifdef HAVE_UINT128 + return ((__uint128_t) b * a) >> 64; +#else + /* + * Let b = b0 + 2^32 * b1. Then a * b = a * b0 + 2^32 * a * b1. We would + * have to do a 96-bit addition to get the full result, except that only + * one term has non-zero lower 32 bits, which means that to get the high 32 + * bits, we only have to add the high 64 bits of each term. Unfortunately, + * we have to do the 64-bit addition in case the low 32 bits overflow. + */ + uint32_t b0 = (uint32_t) b; + uint32_t b1 = b >> 32; + return ((((uint64_t) a * b0) >> 32) + (uint64_t) a * b1) >> 32; +#endif +} + +static inline uint32_t +util_fast_urem32(uint32_t n, uint32_t d, uint64_t magic) +{ + uint64_t lowbits = magic * n; + uint32_t result = _mul32by64_hi(d, lowbits); + assert(result == n % d); + return result; +} + diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c new file mode 100644 index 00000000..1b1d9546 --- /dev/null +++ b/src/mesa/main/hash.c @@ -0,0 +1,425 @@ +/** + * \file hash.c + * Generic hash table. + * + * Used for display lists, texture objects, vertex/fragment programs, + * buffer objects, etc. The hash functions are thread-safe. + * + * \note key=0 is illegal. + * + * \author Brian Paul + */ + +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +//#include "errors.h" +#include +#include "hash.h" +#include "../util/hash_table.h" + + +/** + * Create a new hash table. + * + * \return pointer to a new, empty hash table. + */ +struct _mesa_HashTable * +_mesa_NewHashTable(void) +{ + struct _mesa_HashTable *table = CALLOC_STRUCT(_mesa_HashTable); + + if (table) { + table->ht = _mesa_hash_table_create(NULL, uint_key_hash, + uint_key_compare); + if (table->ht == NULL) { + free(table); + //_mesa_error_no_memory(__func__); + return NULL; + } + + _mesa_hash_table_set_deleted_key(table->ht, uint_key(DELETED_KEY_VALUE)); + /* + * Needs to be recursive, since the callback in _mesa_HashWalk() + * is allowed to call _mesa_HashRemove(). + */ + mtx_init(&table->Mutex, mtx_recursive); + } + else { + //_mesa_error_no_memory(__func__); + } + + return table; +} + + + +/** + * Delete a hash table. + * Frees each entry on the hash table and then the hash table structure itself. + * Note that the caller should have already traversed the table and deleted + * the objects in the table (i.e. We don't free the entries' data pointer). + * + * \param table the hash table to delete. + */ +void +_mesa_DeleteHashTable(struct _mesa_HashTable *table) +{ + assert(table); + + if (_mesa_hash_table_next_entry(table->ht, NULL) != NULL) { + // _mesa_problem(NULL, "In _mesa_DeleteHashTable, found non-freed data"); + } + + _mesa_hash_table_destroy(table->ht, NULL); + + mtx_destroy(&table->Mutex); + free(table); +} + + + +/** + * Lookup an entry in the hash table, without locking. + * \sa _mesa_HashLookup + */ +static inline void * +_mesa_HashLookup_unlocked(struct _mesa_HashTable *table, GLuint key) +{ + const struct hash_entry *entry; + + assert(table); + assert(key); + + if (key == DELETED_KEY_VALUE) + return table->deleted_key_data; + + entry = _mesa_hash_table_search_pre_hashed(table->ht, + uint_hash(key), + uint_key(key)); + if (!entry) + return NULL; + + return entry->data; +} + + +/** + * Lookup an entry in the hash table. + * + * \param table the hash table. + * \param key the key. + * + * \return pointer to user's data or NULL if key not in table + */ +void * +_mesa_HashLookup(struct _mesa_HashTable *table, GLuint key) +{ + void *res; + _mesa_HashLockMutex(table); + res = _mesa_HashLookup_unlocked(table, key); + _mesa_HashUnlockMutex(table); + return res; +} + + +/** + * Lookup an entry in the hash table without locking the mutex. + * + * The hash table mutex must be locked manually by calling + * _mesa_HashLockMutex() before calling this function. + * + * \param table the hash table. + * \param key the key. + * + * \return pointer to user's data or NULL if key not in table + */ +void * +_mesa_HashLookupLocked(struct _mesa_HashTable *table, GLuint key) +{ + return _mesa_HashLookup_unlocked(table, key); +} + + +static inline void +_mesa_HashInsert_unlocked(struct _mesa_HashTable *table, GLuint key, void *data) +{ + uint32_t hash = uint_hash(key); + struct hash_entry *entry; + + assert(table); + assert(key); + + if (key > table->MaxKey) + table->MaxKey = key; + + if (key == DELETED_KEY_VALUE) { + table->deleted_key_data = data; + } else { + entry = _mesa_hash_table_search_pre_hashed(table->ht, hash, uint_key(key)); + if (entry) { + entry->data = data; + } else { + _mesa_hash_table_insert_pre_hashed(table->ht, hash, uint_key(key), data); + } + } +} + + +/** + * Insert a key/pointer pair into the hash table without locking the mutex. + * If an entry with this key already exists we'll replace the existing entry. + * + * The hash table mutex must be locked manually by calling + * _mesa_HashLockMutex() before calling this function. + * + * \param table the hash table. + * \param key the key (not zero). + * \param data pointer to user data. + */ +void +_mesa_HashInsertLocked(struct _mesa_HashTable *table, GLuint key, void *data) +{ + _mesa_HashInsert_unlocked(table, key, data); +} + + +/** + * Insert a key/pointer pair into the hash table. + * If an entry with this key already exists we'll replace the existing entry. + * + * \param table the hash table. + * \param key the key (not zero). + * \param data pointer to user data. + */ +void +_mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data) +{ + _mesa_HashLockMutex(table); + _mesa_HashInsert_unlocked(table, key, data); + _mesa_HashUnlockMutex(table); +} + + +/** + * Remove an entry from the hash table. + * + * \param table the hash table. + * \param key key of entry to remove. + * + * While holding the hash table's lock, searches the entry with the matching + * key and unlinks it. + */ +static inline void +_mesa_HashRemove_unlocked(struct _mesa_HashTable *table, GLuint key) +{ + struct hash_entry *entry; + + assert(table); + assert(key); + + /* assert if _mesa_HashRemove illegally called from _mesa_HashDeleteAll + * callback function. Have to check this outside of mutex lock. + */ + assert(!table->InDeleteAll); + + if (key == DELETED_KEY_VALUE) { + table->deleted_key_data = NULL; + } else { + entry = _mesa_hash_table_search_pre_hashed(table->ht, + uint_hash(key), + uint_key(key)); + _mesa_hash_table_remove(table->ht, entry); + } +} + + +void +_mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint key) +{ + _mesa_HashRemove_unlocked(table, key); +} + +void +_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key) +{ + _mesa_HashLockMutex(table); + _mesa_HashRemove_unlocked(table, key); + _mesa_HashUnlockMutex(table); +} + +/** + * Delete all entries in a hash table, but don't delete the table itself. + * Invoke the given callback function for each table entry. + * + * \param table the hash table to delete + * \param callback the callback function + * \param userData arbitrary pointer to pass along to the callback + * (this is typically a struct gl_context pointer) + */ +void +_mesa_HashDeleteAll(struct _mesa_HashTable *table, + void (*callback)(GLuint key, void *data, void *userData), + void *userData) +{ + assert(callback); + _mesa_HashLockMutex(table); + table->InDeleteAll = GL_TRUE; + hash_table_foreach(table->ht, entry) { + callback((uintptr_t)entry->key, entry->data, userData); + _mesa_hash_table_remove(table->ht, entry); + } + if (table->deleted_key_data) { + callback(DELETED_KEY_VALUE, table->deleted_key_data, userData); + table->deleted_key_data = NULL; + } + table->InDeleteAll = GL_FALSE; + _mesa_HashUnlockMutex(table); +} + + +/** + * Walk over all entries in a hash table, calling callback function for each. + * \param table the hash table to walk + * \param callback the callback function + * \param userData arbitrary pointer to pass along to the callback + * (this is typically a struct gl_context pointer) + */ +static void +hash_walk_unlocked(const struct _mesa_HashTable *table, + void (*callback)(GLuint key, void *data, void *userData), + void *userData) +{ + assert(table); + assert(callback); + + hash_table_foreach(table->ht, entry) { + callback((uintptr_t)entry->key, entry->data, userData); + } + if (table->deleted_key_data) + callback(DELETED_KEY_VALUE, table->deleted_key_data, userData); +} + + +void +_mesa_HashWalk(const struct _mesa_HashTable *table, + void (*callback)(GLuint key, void *data, void *userData), + void *userData) +{ + /* cast-away const */ + struct _mesa_HashTable *table2 = (struct _mesa_HashTable *) table; + + _mesa_HashLockMutex(table2); + hash_walk_unlocked(table, callback, userData); + _mesa_HashUnlockMutex(table2); +} + +void +_mesa_HashWalkLocked(const struct _mesa_HashTable *table, + void (*callback)(GLuint key, void *data, void *userData), + void *userData) +{ + hash_walk_unlocked(table, callback, userData); +} + +static void +debug_print_entry(GLuint key, void *data, void *userData) +{ + //_mesa_debug(NULL, "%u %p\n", key, data); +} + +/** + * Dump contents of hash table for debugging. + * + * \param table the hash table. + */ +void +_mesa_HashPrint(const struct _mesa_HashTable *table) +{ + if (table->deleted_key_data) + debug_print_entry(DELETED_KEY_VALUE, table->deleted_key_data, NULL); + _mesa_HashWalk(table, debug_print_entry, NULL); +} + + +/** + * Find a block of adjacent unused hash keys. + * + * \param table the hash table. + * \param numKeys number of keys needed. + * + * \return Starting key of free block or 0 if failure. + * + * If there are enough free keys between the maximum key existing in the table + * (_mesa_HashTable::MaxKey) and the maximum key possible, then simply return + * the adjacent key. Otherwise do a full search for a free key block in the + * allowable key range. + */ +GLuint +_mesa_HashFindFreeKeyBlock(struct _mesa_HashTable *table, GLuint numKeys) +{ + const GLuint maxKey = ~((GLuint) 0) - 1; + if (maxKey - numKeys > table->MaxKey) { + /* the quick solution */ + return table->MaxKey + 1; + } + else { + /* the slow solution */ + GLuint freeCount = 0; + GLuint freeStart = 1; + GLuint key; + for (key = 1; key != maxKey; key++) { + if (_mesa_HashLookup_unlocked(table, key)) { + /* darn, this key is already in use */ + freeCount = 0; + freeStart = key+1; + } + else { + /* this key not in use, check if we've found enough */ + freeCount++; + if (freeCount == numKeys) { + return freeStart; + } + } + } + /* cannot allocate a block of numKeys consecutive keys */ + return 0; + } +} + + +/** + * Return the number of entries in the hash table. + */ +GLuint +_mesa_HashNumEntries(const struct _mesa_HashTable *table) +{ + GLuint count = 0; + + if (table->deleted_key_data) + count++; + + count += _mesa_hash_table_num_entries(table->ht); + + return count; +} diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h new file mode 100644 index 00000000..9c5e9ffe --- /dev/null +++ b/src/mesa/main/hash.h @@ -0,0 +1,191 @@ +/** + * \file hash.h + * Generic hash table. + */ + +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef HASH_H +#define HASH_H + + +#include +#include +//#include "imports.h" +#include "../c11/threads.h" + +/**********************************************************************/ +/** Memory macros */ +/*@{*/ + +/** Allocate a structure of type \p T */ +#define MALLOC_STRUCT(T) (struct T *) malloc(sizeof(struct T)) +/** Allocate and zero a structure of type \p T */ +#define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) + +/*@}*/ + +/** + * Magic GLuint object name that gets stored outside of the struct hash_table. + * + * The hash table needs a particular pointer to be the marker for a key that + * was deleted from the table, along with NULL for the "never allocated in the + * table" marker. Legacy GL allows any GLuint to be used as a GL object name, + * and we use a 1:1 mapping from GLuints to key pointers, so we need to be + * able to track a GLuint that happens to match the deleted key outside of + * struct hash_table. We tell the hash table to use "1" as the deleted key + * value, so that we test the deleted-key-in-the-table path as best we can. + */ +#define DELETED_KEY_VALUE 1 + +/** @{ + * Mapping from our use of GLuint as both the key and the hash value to the + * hash_table.h API + * + * There exist many integer hash functions, designed to avoid collisions when + * the integers are spread across key space with some patterns. In GL, the + * pattern (in the case of glGen*()ed object IDs) is that the keys are unique + * contiguous integers starting from 1. Because of that, we just use the key + * as the hash value, to minimize the cost of the hash function. If objects + * are never deleted, we will never see a collision in the table, because the + * table resizes itself when it approaches full, and thus key % table_size == + * key. + * + * The case where we could have collisions for genned objects would be + * something like: glGenBuffers(&a, 100); glDeleteBuffers(&a + 50, 50); + * glGenBuffers(&b, 100), because objects 1-50 and 101-200 are allocated at + * the end of that sequence, instead of 1-150. So far it doesn't appear to be + * a problem. + */ +static inline bool +uint_key_compare(const void *a, const void *b) +{ + return a == b; +} + +static inline uint32_t +uint_hash(GLuint id) +{ + return id; +} + +static inline uint32_t +uint_key_hash(const void *key) +{ + return uint_hash((uintptr_t)key); +} + +static inline void * +uint_key(GLuint id) +{ + return (void *)(uintptr_t) id; +} +/** @} */ + +/** + * The hash table data structure. + */ +struct _mesa_HashTable { + struct hash_table *ht; + GLuint MaxKey; /**< highest key inserted so far */ + mtx_t Mutex; /**< mutual exclusion lock */ + GLboolean InDeleteAll; /**< Debug check */ + /** Value that would be in the table for DELETED_KEY_VALUE. */ + void *deleted_key_data; +}; + +extern struct _mesa_HashTable *_mesa_NewHashTable(void); + +extern void _mesa_DeleteHashTable(struct _mesa_HashTable *table); + +extern void *_mesa_HashLookup(struct _mesa_HashTable *table, GLuint key); + +extern void _mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data); + +extern void _mesa_HashRemove(struct _mesa_HashTable *table, GLuint key); + +/** + * Lock the hash table mutex. + * + * This function should be used when multiple objects need + * to be looked up in the hash table, to avoid having to lock + * and unlock the mutex each time. + * + * \param table the hash table. + */ +static inline void +_mesa_HashLockMutex(struct _mesa_HashTable *table) +{ + assert(table); + mtx_lock(&table->Mutex); +} + + +/** + * Unlock the hash table mutex. + * + * \param table the hash table. + */ +static inline void +_mesa_HashUnlockMutex(struct _mesa_HashTable *table) +{ + assert(table); + mtx_unlock(&table->Mutex); +} + +extern void *_mesa_HashLookupLocked(struct _mesa_HashTable *table, GLuint key); + +extern void _mesa_HashInsertLocked(struct _mesa_HashTable *table, + GLuint key, void *data); + +extern void _mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint key); + +extern void +_mesa_HashDeleteAll(struct _mesa_HashTable *table, + void (*callback)(GLuint key, void *data, void *userData), + void *userData); + +extern void +_mesa_HashWalk(const struct _mesa_HashTable *table, + void (*callback)(GLuint key, void *data, void *userData), + void *userData); + +extern void +_mesa_HashWalkLocked(const struct _mesa_HashTable *table, + void (*callback)(GLuint key, void *data, void *userData), + void *userData); + +extern void _mesa_HashPrint(const struct _mesa_HashTable *table); + +extern GLuint _mesa_HashFindFreeKeyBlock(struct _mesa_HashTable *table, GLuint numKeys); + +extern GLuint +_mesa_HashNumEntries(const struct _mesa_HashTable *table); + +extern void _mesa_test_hash_functions(void); + + +#endif diff --git a/src/mesa/no_extern_c.h b/src/mesa/no_extern_c.h new file mode 100644 index 00000000..f79602c0 --- /dev/null +++ b/src/mesa/no_extern_c.h @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2014 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Including system's headers inside `extern "C" { ... }` is not safe, as system + * headers may have C++ code in them, and C++ code inside extern "C" + * leads to syntatically incorrect code. + * + * This is because putting code inside extern "C" won't make __cplusplus define + * go away, that is, the system header being included thinks is free to use C++ + * as it sees fits. + * + * Including non-system headers inside extern "C" is not safe either, because + * non-system headers end up including system headers, hence fall in the above + * case too. + * + * Conclusion, includes inside extern "C" is simply not portable. + * + * + * This header helps surface these issues. + */ + +#ifdef __cplusplus +template class _IncludeInsideExternCNotPortable; +#endif diff --git a/src/mesa/util/detect_os.h b/src/mesa/util/detect_os.h new file mode 100644 index 00000000..6506948e --- /dev/null +++ b/src/mesa/util/detect_os.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright 2008 VMware, Inc. */ + +/** + * Auto-detect the operating system family. + * + * See also: + * - http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + * - echo | gcc -dM -E - | sort + * - http://msdn.microsoft.com/en-us/library/b0084kay.aspx + * + * @author José Fonseca + */ + +#ifndef DETECT_OS_H +#define DETECT_OS_H + +#if defined(__linux__) +#define DETECT_OS_LINUX 1 +#define DETECT_OS_UNIX 1 +#endif + +/* + * Android defines __linux__, so DETECT_OS_LINUX and DETECT_OS_UNIX will + * also be defined. + */ +#if defined(ANDROID) +#define DETECT_OS_ANDROID 1 +#endif + +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#define DETECT_OS_FREEBSD 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__OpenBSD__) +#define DETECT_OS_OPENBSD 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__NetBSD__) +#define DETECT_OS_NETBSD 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__DragonFly__) +#define DETECT_OS_DRAGONFLY 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__GNU__) +#define DETECT_OS_HURD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__sun) +#define DETECT_OS_SOLARIS 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__APPLE__) +#define DETECT_OS_APPLE 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(_WIN32) || defined(WIN32) +#define DETECT_OS_WINDOWS 1 +#endif + +#if defined(__HAIKU__) +#define DETECT_OS_HAIKU 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__CYGWIN__) +#define DETECT_OS_CYGWIN 1 +#define DETECT_OS_UNIX 1 +#endif + + +/* + * Make sure DETECT_OS_* are always defined, so that they can be used with #if + */ +#ifndef DETECT_OS_ANDROID +#define DETECT_OS_ANDROID 0 +#endif +#ifndef DETECT_OS_APPLE +#define DETECT_OS_APPLE 0 +#endif +#ifndef DETECT_OS_BSD +#define DETECT_OS_BSD 0 +#endif +#ifndef DETECT_OS_CYGWIN +#define DETECT_OS_CYGWIN 0 +#endif +#ifndef DETECT_OS_DRAGONFLY +#define DETECT_OS_DRAGONFLY 0 +#endif +#ifndef DETECT_OS_FREEBSD +#define DETECT_OS_FREEBSD 0 +#endif +#ifndef DETECT_OS_HAIKU +#define DETECT_OS_HAIKU 0 +#endif +#ifndef DETECT_OS_HURD +#define DETECT_OS_HURD 0 +#endif +#ifndef DETECT_OS_LINUX +#define DETECT_OS_LINUX 0 +#endif +#ifndef DETECT_OS_NETBSD +#define DETECT_OS_NETBSD 0 +#endif +#ifndef DETECT_OS_OPENBSD +#define DETECT_OS_OPENBSD 0 +#endif +#ifndef DETECT_OS_SOLARIS +#define DETECT_OS_SOLARIS 0 +#endif +#ifndef DETECT_OS_UNIX +#define DETECT_OS_UNIX 0 +#endif +#ifndef DETECT_OS_WINDOWS +#define DETECT_OS_WINDOWS 0 +#endif + +#endif /* DETECT_OS_H */ diff --git a/src/mesa/util/futex.h b/src/mesa/util/futex.h new file mode 100644 index 00000000..cf8dd020 --- /dev/null +++ b/src/mesa/util/futex.h @@ -0,0 +1,108 @@ +/* + * Copyright © 2015 Intel + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef UTIL_FUTEX_H +#define UTIL_FUTEX_H + +#if defined(HAVE_LINUX_FUTEX_H) + +#include +#include +#include +#include +#include +#include + +static inline long sys_futex(void *addr1, int op, int val1, const struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespec *timeout) +{ + /* FUTEX_WAIT_BITSET with FUTEX_BITSET_MATCH_ANY is equivalent to + * FUTEX_WAIT, except that it treats the timeout as absolute. */ + return sys_futex(addr, FUTEX_WAIT_BITSET, value, timeout, NULL, + FUTEX_BITSET_MATCH_ANY); +} + +#elif defined(__FreeBSD__) + +#include +#include +#include +#include +#include +#include + +static inline int futex_wake(uint32_t *addr, int count) +{ + assert(count == (int)(uint32_t)count); /* Check that bits weren't discarded */ + return _umtx_op(addr, UMTX_OP_WAKE, (uint32_t)count, NULL, NULL) == -1 ? errno : 0; +} + +static inline int futex_wait(uint32_t *addr, int32_t value, struct timespec *timeout) +{ + void *uaddr = NULL, *uaddr2 = NULL; + struct _umtx_time tmo = { + ._flags = UMTX_ABSTIME, + ._clockid = CLOCK_MONOTONIC + }; + + assert(value == (int)(uint32_t)value); /* Check that bits weren't discarded */ + + if (timeout != NULL) { + tmo._timeout = *timeout; + uaddr = (void *)(uintptr_t)sizeof(tmo); + uaddr2 = (void *)&tmo; + } + + return _umtx_op(addr, UMTX_OP_WAIT_UINT, (uint32_t)value, uaddr, uaddr2) == -1 ? errno : 0; +} + +#elif defined(__OpenBSD__) + +#include +#include + +static inline int futex_wake(uint32_t *addr, int count) +{ + return futex(addr, FUTEX_WAKE, count, NULL, NULL); +} + +static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespec *timeout) +{ + struct timespec tsrel, tsnow; + clock_gettime(CLOCK_MONOTONIC, &tsnow); + timespecsub(timeout, &tsrel, &tsrel); + return futex(addr, FUTEX_WAIT, value, &tsrel, NULL); +} + +#endif + +#endif /* UTIL_FUTEX_H */ diff --git a/src/mesa/util/hash_table.c b/src/mesa/util/hash_table.c new file mode 100644 index 00000000..aee3f5ee --- /dev/null +++ b/src/mesa/util/hash_table.c @@ -0,0 +1,802 @@ +/* + * Copyright © 2009,2012 Intel Corporation + * Copyright © 1988-2004 Keith Packard and Bart Massey. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Except as contained in this notice, the names of the authors + * or their institutions shall not be used in advertising or + * otherwise to promote the sale, use or other dealings in this + * Software without prior written authorization from the + * authors. + * + * Authors: + * Eric Anholt + * Keith Packard + */ + +/** + * Implements an open-addressing, linear-reprobing hash table. + * + * For more information, see: + * + * http://cgit.freedesktop.org/~anholt/hash_table/tree/README + */ + +#include +#include +#include + +#include "hash_table.h" +#include "ralloc.h" +#include "macros.h" +#include "../main/hash.h" +#include "../fast_urem_by_const.h" + +static const uint32_t deleted_key_value; + +/** + * From Knuth -- a good choice for hash/rehash values is p, p-2 where + * p and p-2 are both prime. These tables are sized to have an extra 10% + * free to avoid exponential performance degradation as the hash table fills + */ +static const struct { + uint32_t max_entries, size, rehash; + uint64_t size_magic, rehash_magic; +} hash_sizes[] = { +#define ENTRY(max_entries, size, rehash) \ + { max_entries, size, rehash, \ + REMAINDER_MAGIC(size), REMAINDER_MAGIC(rehash) } + + ENTRY(2, 5, 3 ), + ENTRY(4, 7, 5 ), + ENTRY(8, 13, 11 ), + ENTRY(16, 19, 17 ), + ENTRY(32, 43, 41 ), + ENTRY(64, 73, 71 ), + ENTRY(128, 151, 149 ), + ENTRY(256, 283, 281 ), + ENTRY(512, 571, 569 ), + ENTRY(1024, 1153, 1151 ), + ENTRY(2048, 2269, 2267 ), + ENTRY(4096, 4519, 4517 ), + ENTRY(8192, 9013, 9011 ), + ENTRY(16384, 18043, 18041 ), + ENTRY(32768, 36109, 36107 ), + ENTRY(65536, 72091, 72089 ), + ENTRY(131072, 144409, 144407 ), + ENTRY(262144, 288361, 288359 ), + ENTRY(524288, 576883, 576881 ), + ENTRY(1048576, 1153459, 1153457 ), + ENTRY(2097152, 2307163, 2307161 ), + ENTRY(4194304, 4613893, 4613891 ), + ENTRY(8388608, 9227641, 9227639 ), + ENTRY(16777216, 18455029, 18455027 ), + ENTRY(33554432, 36911011, 36911009 ), + ENTRY(67108864, 73819861, 73819859 ), + ENTRY(134217728, 147639589, 147639587 ), + ENTRY(268435456, 295279081, 295279079 ), + ENTRY(536870912, 590559793, 590559791 ), + ENTRY(1073741824, 1181116273, 1181116271 ), + ENTRY(2147483648ul, 2362232233ul, 2362232231ul ) +}; + +static inline bool +key_pointer_is_reserved(const struct hash_table *ht, const void *key) +{ + return key == NULL || key == ht->deleted_key; +} + +static int +entry_is_free(const struct hash_entry *entry) +{ + return entry->key == NULL; +} + +static int +entry_is_deleted(const struct hash_table *ht, struct hash_entry *entry) +{ + return entry->key == ht->deleted_key; +} + +static int +entry_is_present(const struct hash_table *ht, struct hash_entry *entry) +{ + return entry->key != NULL && entry->key != ht->deleted_key; +} + +bool +_mesa_hash_table_init(struct hash_table *ht, + void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)) +{ + ht->size_index = 0; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->key_hash_function = key_hash_function; + ht->key_equals_function = key_equals_function; + ht->table = rzalloc_array(mem_ctx, struct hash_entry, ht->size); + ht->entries = 0; + ht->deleted_entries = 0; + ht->deleted_key = &deleted_key_value; + + return ht->table != NULL; +} + +struct hash_table * +_mesa_hash_table_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)) +{ + struct hash_table *ht; + + /* mem_ctx is used to allocate the hash table, but the hash table is used + * to allocate all of the suballocations. + */ + ht = ralloc(mem_ctx, struct hash_table); + if (ht == NULL) + return NULL; + + if (!_mesa_hash_table_init(ht, ht, key_hash_function, key_equals_function)) { + ralloc_free(ht); + return NULL; + } + + return ht; +} + +struct hash_table * +_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx) +{ + struct hash_table *ht; + + ht = ralloc(dst_mem_ctx, struct hash_table); + if (ht == NULL) + return NULL; + + memcpy(ht, src, sizeof(struct hash_table)); + + ht->table = ralloc_array(ht, struct hash_entry, ht->size); + if (ht->table == NULL) { + ralloc_free(ht); + return NULL; + } + + memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry)); + + return ht; +} + +/** + * Frees the given hash table. + * + * If delete_function is passed, it gets called on each entry present before + * freeing. + */ +void +_mesa_hash_table_destroy(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + if (delete_function) { + hash_table_foreach(ht, entry) { + delete_function(entry); + } + } + ralloc_free(ht); +} + +/** + * Deletes all entries of the given hash table without deleting the table + * itself or changing its structure. + * + * If delete_function is passed, it gets called on each entry present. + */ +void +_mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + struct hash_entry *entry; + + for (entry = ht->table; entry != ht->table + ht->size; entry++) { + if (entry->key == NULL) + continue; + + if (delete_function != NULL && entry->key != ht->deleted_key) + delete_function(entry); + + entry->key = NULL; + } + + ht->entries = 0; + ht->deleted_entries = 0; +} + +/** Sets the value of the key pointer used for deleted entries in the table. + * + * The assumption is that usually keys are actual pointers, so we use a + * default value of a pointer to an arbitrary piece of storage in the library. + * But in some cases a consumer wants to store some other sort of value in the + * table, like a uint32_t, in which case that pointer may conflict with one of + * their valid keys. This lets that user select a safe value. + * + * This must be called before any keys are actually deleted from the table. + */ +void +_mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key) +{ + ht->deleted_key = deleted_key; +} + +static struct hash_entry * +hash_table_search(struct hash_table *ht, uint32_t hash, const void *key) +{ + assert(!key_pointer_is_reserved(ht, key)); + + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + + do { + struct hash_entry *entry = ht->table + hash_address; + + if (entry_is_free(entry)) { + return NULL; + } else if (entry_is_present(ht, entry) && entry->hash == hash) { + if (ht->key_equals_function(key, entry->key)) { + return entry; + } + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_hash_address); + + return NULL; +} + +/** + * Finds a hash table entry with the given key and hash of that key. + * + * Returns NULL if no entry is found. Note that the data pointer may be + * modified by the user. + */ +struct hash_entry * +_mesa_hash_table_search(struct hash_table *ht, const void *key) +{ + assert(ht->key_hash_function); + return hash_table_search(ht, ht->key_hash_function(key), key); +} + +struct hash_entry * +_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key) +{ + assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key)); + return hash_table_search(ht, hash, key); +} + +static struct hash_entry * +hash_table_insert(struct hash_table *ht, uint32_t hash, + const void *key, void *data); + +static void +hash_table_insert_rehash(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + do { + struct hash_entry *entry = ht->table + hash_address; + + if (likely(entry->key == NULL)) { + entry->hash = hash; + entry->key = key; + entry->data = data; + return; + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (true); +} + +static void +_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index) +{ + struct hash_table old_ht; + struct hash_entry *table; + + if (new_size_index >= ARRAY_SIZE(hash_sizes)) + return; + + table = rzalloc_array(ralloc_parent(ht->table), struct hash_entry, + hash_sizes[new_size_index].size); + if (table == NULL) + return; + + old_ht = *ht; + + ht->table = table; + ht->size_index = new_size_index; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->entries = 0; + ht->deleted_entries = 0; + + hash_table_foreach(&old_ht, entry) { + hash_table_insert_rehash(ht, entry->hash, entry->key, entry->data); + } + + ht->entries = old_ht.entries; + + ralloc_free(old_ht.table); +} + +static struct hash_entry * +hash_table_insert(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + struct hash_entry *available_entry = NULL; + + assert(!key_pointer_is_reserved(ht, key)); + + if (ht->entries >= ht->max_entries) { + _mesa_hash_table_rehash(ht, ht->size_index + 1); + } else if (ht->deleted_entries + ht->entries >= ht->max_entries) { + _mesa_hash_table_rehash(ht, ht->size_index); + } + + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + do { + struct hash_entry *entry = ht->table + hash_address; + + if (!entry_is_present(ht, entry)) { + /* Stash the first available entry we find */ + if (available_entry == NULL) + available_entry = entry; + if (entry_is_free(entry)) + break; + } + + /* Implement replacement when another insert happens + * with a matching key. This is a relatively common + * feature of hash tables, with the alternative + * generally being "insert the new value as well, and + * return it first when the key is searched for". + * + * Note that the hash table doesn't have a delete + * callback. If freeing of old data pointers is + * required to avoid memory leaks, perform a search + * before inserting. + */ + if (!entry_is_deleted(ht, entry) && + entry->hash == hash && + ht->key_equals_function(key, entry->key)) { + entry->key = key; + entry->data = data; + return entry; + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_hash_address); + + if (available_entry) { + if (entry_is_deleted(ht, available_entry)) + ht->deleted_entries--; + available_entry->hash = hash; + available_entry->key = key; + available_entry->data = data; + ht->entries++; + return available_entry; + } + + /* We could hit here if a required resize failed. An unchecked-malloc + * application could ignore this result. + */ + return NULL; +} + +/** + * Inserts the key with the given hash into the table. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ +struct hash_entry * +_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data) +{ + assert(ht->key_hash_function); + return hash_table_insert(ht, ht->key_hash_function(key), key, data); +} + +struct hash_entry * +_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key)); + return hash_table_insert(ht, hash, key, data); +} + +/** + * This function deletes the given hash table entry. + * + * Note that deletion doesn't otherwise modify the table, so an iteration over + * the table deleting entries is safe. + */ +void +_mesa_hash_table_remove(struct hash_table *ht, + struct hash_entry *entry) +{ + if (!entry) + return; + + entry->key = ht->deleted_key; + ht->entries--; + ht->deleted_entries++; +} + +/** + * Removes the entry with the corresponding key, if exists. + */ +void _mesa_hash_table_remove_key(struct hash_table *ht, + const void *key) +{ + _mesa_hash_table_remove(ht, _mesa_hash_table_search(ht, key)); +} + +/** + * This function is an iterator over the hash table. + * + * Pass in NULL for the first entry, as in the start of a for loop. Note that + * an iteration over the table is O(table_size) not O(entries). + */ +struct hash_entry * +_mesa_hash_table_next_entry(struct hash_table *ht, + struct hash_entry *entry) +{ + if (entry == NULL) + entry = ht->table; + else + entry = entry + 1; + + for (; entry != ht->table + ht->size; entry++) { + if (entry_is_present(ht, entry)) { + return entry; + } + } + + return NULL; +} + +/** + * Returns a random entry from the hash table. + * + * This may be useful in implementing random replacement (as opposed + * to just removing everything) in caches based on this hash table + * implementation. @predicate may be used to filter entries, or may + * be set to NULL for no filtering. + */ +struct hash_entry * +_mesa_hash_table_random_entry(struct hash_table *ht, + bool (*predicate)(struct hash_entry *entry)) +{ + struct hash_entry *entry; + uint32_t i = rand() % ht->size; + + if (ht->entries == 0) + return NULL; + + for (entry = ht->table + i; entry != ht->table + ht->size; entry++) { + if (entry_is_present(ht, entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + for (entry = ht->table; entry != ht->table + i; entry++) { + if (entry_is_present(ht, entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + return NULL; +} + + +/** + * Quick FNV-1a hash implementation based on: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * FNV-1a is not be the best hash out there -- Jenkins's lookup3 is supposed + * to be quite good, and it probably beats FNV. But FNV has the advantage + * that it involves almost no code. For an improvement on both, see Paul + * Hsieh's http://www.azillionmonkeys.com/qed/hash.html + */ +uint32_t +_mesa_hash_data(const void *data, size_t size) +{ + return _mesa_fnv32_1a_accumulate_block(_mesa_fnv32_1a_offset_bias, + data, size); +} + +/** FNV-1a string hash implementation */ +uint32_t +_mesa_hash_string(const void *_key) +{ + uint32_t hash = _mesa_fnv32_1a_offset_bias; + const char *key = _key; + + while (*key != 0) { + hash = _mesa_fnv32_1a_accumulate(hash, *key); + key++; + } + + return hash; +} + +/** + * String compare function for use as the comparison callback in + * _mesa_hash_table_create(). + */ +bool +_mesa_key_string_equal(const void *a, const void *b) +{ + return strcmp(a, b) == 0; +} + +bool +_mesa_key_pointer_equal(const void *a, const void *b) +{ + return a == b; +} + +/** + * Helper to create a hash table with pointer keys. + */ +struct hash_table * +_mesa_pointer_hash_table_create(void *mem_ctx) +{ + return _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +/** + * Hash table wrapper which supports 64-bit keys. + * + * TODO: unify all hash table implementations. + */ + +struct hash_key_u64 { + uint64_t value; +}; + +static uint32_t +key_u64_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct hash_key_u64)); +} + +static bool +key_u64_equals(const void *a, const void *b) +{ + const struct hash_key_u64 *aa = a; + const struct hash_key_u64 *bb = b; + + return aa->value == bb->value; +} + +#define FREED_KEY_VALUE 0 + +struct hash_table_u64 * +_mesa_hash_table_u64_create(void *mem_ctx) +{ + STATIC_ASSERT(FREED_KEY_VALUE != DELETED_KEY_VALUE); + struct hash_table_u64 *ht; + + ht = CALLOC_STRUCT(hash_table_u64); + if (!ht) + return NULL; + + if (sizeof(void *) == 8) { + ht->table = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } else { + ht->table = _mesa_hash_table_create(mem_ctx, key_u64_hash, + key_u64_equals); + } + + if (ht->table) + _mesa_hash_table_set_deleted_key(ht->table, uint_key(DELETED_KEY_VALUE)); + + return ht; +} + +void +_mesa_hash_table_u64_clear(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + if (ht->deleted_key_data) { + if (delete_function) { + struct hash_table *table = ht->table; + struct hash_entry entry; + + /* Create a fake entry for the delete function. */ + if (sizeof(void *) == 8) { + entry.hash = table->key_hash_function(table->deleted_key); + } else { + struct hash_key_u64 _key = { .value = (uintptr_t)table->deleted_key }; + entry.hash = table->key_hash_function(&_key); + } + entry.key = table->deleted_key; + entry.data = ht->deleted_key_data; + + delete_function(&entry); + } + ht->deleted_key_data = NULL; + } + + if (ht->freed_key_data) { + if (delete_function) { + struct hash_table *table = ht->table; + struct hash_entry entry; + + /* Create a fake entry for the delete function. */ + if (sizeof(void *) == 8) { + entry.hash = table->key_hash_function(uint_key(FREED_KEY_VALUE)); + } else { + struct hash_key_u64 _key = { .value = (uintptr_t)FREED_KEY_VALUE }; + entry.hash = table->key_hash_function(&_key); + } + entry.key = uint_key(FREED_KEY_VALUE); + entry.data = ht->freed_key_data; + + delete_function(&entry); + } + ht->freed_key_data = NULL; + } + + _mesa_hash_table_clear(ht->table, delete_function); +} + +void +_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + _mesa_hash_table_u64_clear(ht, delete_function); + _mesa_hash_table_destroy(ht->table, delete_function); + free(ht); +} + +void +_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key, + void *data) +{ + if (key == FREED_KEY_VALUE) { + ht->freed_key_data = data; + return; + } + + if (key == DELETED_KEY_VALUE) { + ht->deleted_key_data = data; + return; + } + + if (sizeof(void *) == 8) { + _mesa_hash_table_insert(ht->table, (void *)(uintptr_t)key, data); + } else { + struct hash_key_u64 *_key = CALLOC_STRUCT(hash_key_u64); + + if (!_key) + return; + _key->value = key; + + _mesa_hash_table_insert(ht->table, _key, data); + } +} + +static struct hash_entry * +hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key) +{ + if (sizeof(void *) == 8) { + return _mesa_hash_table_search(ht->table, (void *)(uintptr_t)key); + } else { + struct hash_key_u64 _key = { .value = key }; + return _mesa_hash_table_search(ht->table, &_key); + } +} + +void * +_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key) +{ + struct hash_entry *entry; + + if (key == FREED_KEY_VALUE) + return ht->freed_key_data; + + if (key == DELETED_KEY_VALUE) + return ht->deleted_key_data; + + entry = hash_table_u64_search(ht, key); + if (!entry) + return NULL; + + return entry->data; +} + +void +_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key) +{ + struct hash_entry *entry; + + if (key == FREED_KEY_VALUE) { + ht->freed_key_data = NULL; + return; + } + + if (key == DELETED_KEY_VALUE) { + ht->deleted_key_data = NULL; + return; + } + + entry = hash_table_u64_search(ht, key); + if (!entry) + return; + + if (sizeof(void *) == 8) { + _mesa_hash_table_remove(ht->table, entry); + } else { + struct hash_key *_key = (struct hash_key *)entry->key; + + _mesa_hash_table_remove(ht->table, entry); + free(_key); + } +} diff --git a/src/mesa/util/hash_table.h b/src/mesa/util/hash_table.h new file mode 100644 index 00000000..8f1d6860 --- /dev/null +++ b/src/mesa/util/hash_table.h @@ -0,0 +1,205 @@ +/* + * Copyright © 2009,2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#ifndef _HASH_TABLE_H +#define _HASH_TABLE_H + +#include +#include +#include +#include "../c99_compat.h" +#include "macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct hash_entry { + uint32_t hash; + const void *key; + void *data; +}; + +struct hash_table { + struct hash_entry *table; + uint32_t (*key_hash_function)(const void *key); + bool (*key_equals_function)(const void *a, const void *b); + const void *deleted_key; + uint32_t size; + uint32_t rehash; + uint64_t size_magic; + uint64_t rehash_magic; + uint32_t max_entries; + uint32_t size_index; + uint32_t entries; + uint32_t deleted_entries; +}; + +struct hash_table * +_mesa_hash_table_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)); + +bool +_mesa_hash_table_init(struct hash_table *ht, + void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)); + +struct hash_table * +_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx); +void _mesa_hash_table_destroy(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)); +void _mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)); +void _mesa_hash_table_set_deleted_key(struct hash_table *ht, + const void *deleted_key); + +static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht) +{ + return ht->entries; +} + +struct hash_entry * +_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data); +struct hash_entry * +_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key, void *data); +struct hash_entry * +_mesa_hash_table_search(struct hash_table *ht, const void *key); +struct hash_entry * +_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key); +void _mesa_hash_table_remove(struct hash_table *ht, + struct hash_entry *entry); +void _mesa_hash_table_remove_key(struct hash_table *ht, + const void *key); + +struct hash_entry *_mesa_hash_table_next_entry(struct hash_table *ht, + struct hash_entry *entry); +struct hash_entry * +_mesa_hash_table_random_entry(struct hash_table *ht, + bool (*predicate)(struct hash_entry *entry)); + +uint32_t _mesa_hash_data(const void *data, size_t size); +uint32_t _mesa_hash_string(const void *key); +bool _mesa_key_string_equal(const void *a, const void *b); +bool _mesa_key_pointer_equal(const void *a, const void *b); + +static inline uint32_t _mesa_key_hash_string(const void *key) +{ + return _mesa_hash_string((const char *)key); +} + +static inline uint32_t _mesa_hash_pointer(const void *pointer) +{ + uintptr_t num = (uintptr_t) pointer; + return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14)); +} + +struct hash_table * +_mesa_pointer_hash_table_create(void *mem_ctx); + +enum { + _mesa_fnv32_1a_offset_bias = 2166136261u, +}; + +static inline uint32_t +_mesa_fnv32_1a_accumulate_block(uint32_t hash, const void *data, size_t size) +{ + const uint8_t *bytes = (const uint8_t *)data; + + while (size-- != 0) { + hash ^= *bytes; + hash = hash * 0x01000193; + bytes++; + } + + return hash; +} + +#define _mesa_fnv32_1a_accumulate(hash, expr) \ + _mesa_fnv32_1a_accumulate_block(hash, &(expr), sizeof(expr)) + +/** + * This foreach function is safe against deletion (which just replaces + * an entry's data with the deleted marker), but not against insertion + * (which may rehash the table, making entry a dangling pointer). + */ +#define hash_table_foreach(ht, entry) \ + for (struct hash_entry *entry = _mesa_hash_table_next_entry(ht, NULL); \ + entry != NULL; \ + entry = _mesa_hash_table_next_entry(ht, entry)) + +static inline void +hash_table_call_foreach(struct hash_table *ht, + void (*callback)(const void *key, + void *data, + void *closure), + void *closure) +{ + hash_table_foreach(ht, entry) + callback(entry->key, entry->data, closure); +} + +/** + * Hash table wrapper which supports 64-bit keys. + */ +struct hash_table_u64 { + struct hash_table *table; + void *freed_key_data; + void *deleted_key_data; +}; + +struct hash_table_u64 * +_mesa_hash_table_u64_create(void *mem_ctx); + +void +_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)); + +void +_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key, + void *data); + +void * +_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key); + +void +_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key); + +void +_mesa_hash_table_u64_clear(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _HASH_TABLE_H */ diff --git a/src/mesa/util/list.h b/src/mesa/util/list.h new file mode 100644 index 00000000..91b6cd46 --- /dev/null +++ b/src/mesa/util/list.h @@ -0,0 +1,249 @@ +/************************************************************************** + * + * Copyright 2006 VMware, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * \file + * List macros heavily inspired by the Linux kernel + * list handling. No list looping yet. + * + * Is not threadsafe, so common operations need to + * be protected using an external mutex. + */ + +#ifndef _UTIL_LIST_H_ +#define _UTIL_LIST_H_ + + +#include +#include +#include +#include "../c99_compat.h" + +#ifdef DEBUG +# define list_assert(cond, msg) assert(cond && msg) +#else +# define list_assert(cond, msg) (void)(0 && (cond)) +#endif + +struct list_head +{ + struct list_head *prev; + struct list_head *next; +}; + +static inline void list_inithead(struct list_head *item) +{ + item->prev = item; + item->next = item; +} + +static inline void list_add(struct list_head *item, struct list_head *list) +{ + item->prev = list; + item->next = list->next; + list->next->prev = item; + list->next = item; +} + +static inline void list_addtail(struct list_head *item, struct list_head *list) +{ + item->next = list; + item->prev = list->prev; + list->prev->next = item; + list->prev = item; +} + +static inline bool list_is_empty(const struct list_head *list); + +static inline void list_replace(struct list_head *from, struct list_head *to) +{ + if (list_is_empty(from)) { + list_inithead(to); + } else { + to->prev = from->prev; + to->next = from->next; + from->next->prev = to; + from->prev->next = to; + } +} + +static inline void list_del(struct list_head *item) +{ + item->prev->next = item->next; + item->next->prev = item->prev; + item->prev = item->next = NULL; +} + +static inline void list_delinit(struct list_head *item) +{ + item->prev->next = item->next; + item->next->prev = item->prev; + item->next = item; + item->prev = item; +} + +static inline bool list_is_empty(const struct list_head *list) +{ + return list->next == list; +} + +/** + * Returns whether the list has exactly one element. + */ +static inline bool list_is_singular(const struct list_head *list) +{ + return list->next != NULL && list->next != list && list->next->next == list; +} + +static inline unsigned list_length(const struct list_head *list) +{ + struct list_head *node; + unsigned length = 0; + for (node = list->next; node != list; node = node->next) + length++; + return length; +} + +static inline void list_splice(struct list_head *src, struct list_head *dst) +{ + if (list_is_empty(src)) + return; + + src->next->prev = dst; + src->prev->next = dst->next; + dst->next->prev = src->prev; + dst->next = src->next; +} + +static inline void list_splicetail(struct list_head *src, struct list_head *dst) +{ + if (list_is_empty(src)) + return; + + src->prev->next = dst; + src->next->prev = dst->prev; + dst->prev->next = src->next; + dst->prev = src->prev; +} + +static inline void list_validate(const struct list_head *list) +{ + struct list_head *node; + assert(list->next->prev == list && list->prev->next == list); + for (node = list->next; node != list; node = node->next) + assert(node->next->prev == node && node->prev->next == node); +} + +#define LIST_ENTRY(__type, __item, __field) \ + ((__type *)(((char *)(__item)) - offsetof(__type, __field))) + +/** + * Cast from a pointer to a member of a struct back to the containing struct. + * + * 'sample' MUST be initialized, or else the result is undefined! + */ +#ifndef container_of +#define container_of(ptr, sample, member) \ + (void *)((char *)(ptr) \ + - ((char *)&(sample)->member - (char *)(sample))) +#endif + +#define list_first_entry(ptr, type, member) \ + LIST_ENTRY(type, (ptr)->next, member) + +#define list_last_entry(ptr, type, member) \ + LIST_ENTRY(type, (ptr)->prev, member) + + +#define LIST_FOR_EACH_ENTRY(pos, head, member) \ + for (pos = NULL, pos = container_of((head)->next, pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.next, pos, member)) + +#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ + for (pos = NULL, pos = container_of((head)->next, pos, member), \ + storage = container_of(pos->member.next, pos, member); \ + &pos->member != (head); \ + pos = storage, storage = container_of(storage->member.next, storage, member)) + +#define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \ + for (pos = NULL, pos = container_of((head)->prev, pos, member), \ + storage = container_of(pos->member.prev, pos, member); \ + &pos->member != (head); \ + pos = storage, storage = container_of(storage->member.prev, storage, member)) + +#define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \ + for (pos = NULL, pos = container_of((start), pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.next, pos, member)) + +#define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \ + for (pos = NULL, pos = container_of((start), pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.prev, pos, member)) + +#define list_for_each_entry(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->next, member), \ + *__next = LIST_ENTRY(type, pos->member.next, member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.next, member), \ + list_assert(pos == __next, "use _safe iterator"), \ + __next = LIST_ENTRY(type, __next->member.next, member)) + +#define list_for_each_entry_safe(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->next, member), \ + *__next = LIST_ENTRY(type, pos->member.next, member); \ + &pos->member != (head); \ + pos = __next, \ + __next = LIST_ENTRY(type, __next->member.next, member)) + +#define list_for_each_entry_rev(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->prev, member), \ + *__prev = LIST_ENTRY(type, pos->member.prev, member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.prev, member), \ + list_assert(pos == __prev, "use _safe iterator"), \ + __prev = LIST_ENTRY(type, __prev->member.prev, member)) + +#define list_for_each_entry_safe_rev(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->prev, member), \ + *__prev = LIST_ENTRY(type, pos->member.prev, member); \ + &pos->member != (head); \ + pos = __prev, \ + __prev = LIST_ENTRY(type, __prev->member.prev, member)) + +#define list_for_each_entry_from(type, pos, start, head, member) \ + for (type *pos = LIST_ENTRY(type, (start), member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.next, member)) + +#define list_for_each_entry_from_rev(type, pos, start, head, member) \ + for (type *pos = LIST_ENTRY(type, (start), member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.prev, member)) + +#endif /*_UTIL_LIST_H_*/ diff --git a/src/mesa/util/macros.h b/src/mesa/util/macros.h new file mode 100644 index 00000000..16c88dbb --- /dev/null +++ b/src/mesa/util/macros.h @@ -0,0 +1,335 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef UTIL_MACROS_H +#define UTIL_MACROS_H + +#include + +#include "../c99_compat.h" +#include "../c11_compat.h" + +/* Compute the size of an array */ +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +/* For compatibility with Clang's __has_builtin() */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/** + * __builtin_expect macros + */ +#if !defined(HAVE___BUILTIN_EXPECT) +# define __builtin_expect(x, y) (x) +#endif + +#ifndef likely +# ifdef HAVE___BUILTIN_EXPECT +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +# else +# define likely(x) (x) +# define unlikely(x) (x) +# endif +#endif + + +/** + * Static (compile-time) assertion. + * Basically, use COND to dimension an array. If COND is false/zero the + * array size will be -1 and we'll get a compilation error. + */ +#define STATIC_ASSERT(COND) \ + do { \ + (void) sizeof(char [1 - 2*!(COND)]); \ + } while (0) + + +/** + * Unreachable macro. Useful for suppressing "control reaches end of non-void + * function" warnings. + */ +#if defined(HAVE___BUILTIN_UNREACHABLE) || __has_builtin(__builtin_unreachable) +#define unreachable(str) \ +do { \ + assert(!str); \ + __builtin_unreachable(); \ +} while (0) +#elif defined (_MSC_VER) +#define unreachable(str) \ +do { \ + assert(!str); \ + __assume(0); \ +} while (0) +#else +#define unreachable(str) assert(!str) +#endif + +/** + * Assume macro. Useful for expressing our assumptions to the compiler, + * typically for purposes of silencing warnings. + */ +#if __has_builtin(__builtin_assume) +#define assume(expr) \ +do { \ + assert(expr); \ + __builtin_assume(expr); \ +} while (0) +#elif defined HAVE___BUILTIN_UNREACHABLE +#define assume(expr) ((expr) ? ((void) 0) \ + : (assert(!"assumption failed"), \ + __builtin_unreachable())) +#elif defined (_MSC_VER) +#define assume(expr) __assume(expr) +#else +#define assume(expr) assert(expr) +#endif + +/* Attribute const is used for functions that have no effects other than their + * return value, and only rely on the argument values to compute the return + * value. As a result, calls to it can be CSEed. Note that using memory + * pointed to by the arguments is not allowed for const functions. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_CONST +#define ATTRIBUTE_CONST __attribute__((__const__)) +#else +#define ATTRIBUTE_CONST +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_FLATTEN +#define FLATTEN __attribute__((__flatten__)) +#else +#define FLATTEN +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT +#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) +#else +#define PRINTFLIKE(f, a) +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_MALLOC +#define MALLOCLIKE __attribute__((__malloc__)) +#else +#define MALLOCLIKE +#endif + +/* Forced function inlining */ +/* Note: Clang also sets __GNUC__ (see other cases below) */ +#ifndef ALWAYS_INLINE +# if defined(__GNUC__) +# define ALWAYS_INLINE inline __attribute__((always_inline)) +# elif defined(_MSC_VER) +# define ALWAYS_INLINE __forceinline +# else +# define ALWAYS_INLINE inline +# endif +#endif + +/* Used to optionally mark structures with misaligned elements or size as + * packed, to trade off performance for space. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_PACKED +#define PACKED __attribute__((__packed__)) +#else +#define PACKED +#endif + +/* Attribute pure is used for functions that have no effects other than their + * return value. As a result, calls to it can be dead code eliminated. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_PURE +#define ATTRIBUTE_PURE __attribute__((__pure__)) +#else +#define ATTRIBUTE_PURE +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL +#define ATTRIBUTE_RETURNS_NONNULL __attribute__((__returns_nonnull__)) +#else +#define ATTRIBUTE_RETURNS_NONNULL +#endif + +#ifndef NORETURN +# ifdef _MSC_VER +# define NORETURN __declspec(noreturn) +# elif defined HAVE_FUNC_ATTRIBUTE_NORETURN +# define NORETURN __attribute__((__noreturn__)) +# else +# define NORETURN +# endif +#endif + +#ifdef __cplusplus +/** + * Macro function that evaluates to true if T is a trivially + * destructible type -- that is, if its (non-virtual) destructor + * performs no action and all member variables and base classes are + * trivially destructible themselves. + */ +# if (defined(__clang__) && defined(__has_feature)) +# if __has_feature(has_trivial_destructor) +# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) +# endif +# elif defined(__GNUC__) +# if ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))) +# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) +# endif +# elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) +# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) +# endif +# ifndef HAS_TRIVIAL_DESTRUCTOR + /* It's always safe (if inefficient) to assume that a + * destructor is non-trivial. + */ +# define HAS_TRIVIAL_DESTRUCTOR(T) (false) +# endif +#endif + +/** + * PUBLIC/USED macros + * + * If we build the library with gcc's -fvisibility=hidden flag, we'll + * use the PUBLIC macro to mark functions that are to be exported. + * + * We also need to define a USED attribute, so the optimizer doesn't + * inline a static function that we later use in an alias. - ajax + */ +#ifndef PUBLIC +# if defined(__GNUC__) +# define PUBLIC __attribute__((visibility("default"))) +# define USED __attribute__((used)) +# elif defined(_MSC_VER) +# define PUBLIC __declspec(dllexport) +# define USED +# else +# define PUBLIC +# define USED +# endif +#endif + +/** + * UNUSED marks variables (or sometimes functions) that have to be defined, + * but are sometimes (or always) unused beyond that. A common case is for + * a function parameter to be used in some build configurations but not others. + * Another case is fallback vfuncs that don't do anything with their params. + * + * Note that this should not be used for identifiers used in `assert()`; + * see ASSERTED below. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_UNUSED +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif + +/** + * Use ASSERTED to indicate that an identifier is unused outside of an `assert()`, + * so that assert-free builds don't get "unused variable" warnings. + */ +#ifdef NDEBUG +#define ASSERTED UNUSED +#else +#define ASSERTED +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT +#define MUST_CHECK __attribute__((warn_unused_result)) +#else +#define MUST_CHECK +#endif + +#if defined(__GNUC__) +#define ATTRIBUTE_NOINLINE __attribute__((noinline)) +#else +#define ATTRIBUTE_NOINLINE +#endif + + +/** + * Check that STRUCT::FIELD can hold MAXVAL. We use a lot of bitfields + * in Mesa/gallium. We have to be sure they're of sufficient size to + * hold the largest expected value. + * Note that with MSVC, enums are signed and enum bitfields need one extra + * high bit (always zero) to ensure the max value is handled correctly. + * This macro will detect that with MSVC, but not GCC. + */ +#define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \ + do { \ + ASSERTED STRUCT s; \ + s.FIELD = (MAXVAL); \ + assert((int) s.FIELD == (MAXVAL) && "Insufficient bitfield size!"); \ + } while (0) + + +/** Compute ceiling of integer quotient of A divided by B. */ +#define DIV_ROUND_UP( A, B ) ( ((A) + (B) - 1) / (B) ) + +/** Clamp X to [MIN,MAX]. Turn NaN into MIN, arbitrarily. */ +#define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) ) + +/** Minimum of two values: */ +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) + +/** Maximum of two values: */ +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + +/** Minimum and maximum of three values: */ +#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) +#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) + +/** Align a value to a power of two */ +#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1)) + +/** + * Macro for declaring an explicit conversion operator. Defaults to an + * implicit conversion if C++11 is not supported. + */ +#if __cplusplus >= 201103L +#define EXPLICIT_CONVERSION explicit +#elif defined(__cplusplus) +#define EXPLICIT_CONVERSION +#endif + +/** Set a single bit */ +#define BITFIELD_BIT(b) (1u << (b)) +/** Set all bits up to excluding bit b */ +#define BITFIELD_MASK(b) \ + ((b) == 32 ? (~0u) : BITFIELD_BIT((b) % 32) - 1) +/** Set count bits starting from bit b */ +#define BITFIELD_RANGE(b, count) \ + (BITFIELD_MASK((b) + (count)) & ~BITFIELD_MASK(b)) + +/** Set a single bit */ +#define BITFIELD64_BIT(b) (1ull << (b)) +/** Set all bits up to excluding bit b */ +#define BITFIELD64_MASK(b) \ + ((b) == 64 ? (~0ull) : BITFIELD64_BIT(b) - 1) +/** Set count bits starting from bit b */ +#define BITFIELD64_RANGE(b, count) \ + (BITFIELD64_MASK((b) + (count)) & ~BITFIELD64_MASK(b)) + +#endif /* UTIL_MACROS_H */ diff --git a/src/mesa/util/os_socket.c b/src/mesa/util/os_socket.c new file mode 100644 index 00000000..98ef0132 --- /dev/null +++ b/src/mesa/util/os_socket.c @@ -0,0 +1,122 @@ +/* + * Copyright 2019 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include + +#include "os_socket.h" + +#if defined(__linux__) + +#include +#include +#include +#include +#include +#include +#include + +int +os_socket_listen_abstract(const char *path, int count) +{ + int s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) + return -1; + + struct sockaddr_un addr; + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path + 1, path, sizeof(addr.sun_path) - 2); + + /* Create an abstract socket */ + int ret = bind(s, (struct sockaddr*)&addr, + offsetof(struct sockaddr_un, sun_path) + + strlen(path) + 1); + if (ret < 0) + return -1; + + listen(s, count); + + return s; +} + +int +os_socket_accept(int s) +{ + return accept(s, NULL, NULL); +} + +ssize_t +os_socket_recv(int socket, void *buffer, size_t length, int flags) +{ + return recv(socket, buffer, length, flags); +} + +ssize_t +os_socket_send(int socket, const void *buffer, size_t length, int flags) +{ + return send(socket, buffer, length, flags); +} + +void +os_socket_block(int s, bool block) +{ + int old = fcntl(s, F_GETFL, 0); + if (old == -1) + return; + + /* TODO obey block */ + if (block) + fcntl(s, F_SETFL, old & ~O_NONBLOCK); + else + fcntl(s, F_SETFL, old | O_NONBLOCK); +} + +void +os_socket_close(int s) +{ + close(s); +} + +#else + +int +os_socket_listen_abstract(const char *path, int count) +{ + errno = -ENOSYS; + return -1; +} + +int +os_socket_accept(int s) +{ + errno = -ENOSYS; + return -1; +} + +ssize_t +os_socket_recv(int socket, void *buffer, size_t length, int flags) +{ + errno = -ENOSYS; + return -1; +} + +ssize_t +os_socket_send(int socket, const void *buffer, size_t length, int flags) +{ + errno = -ENOSYS; + return -1; +} + +void +os_socket_block(int s, bool block) +{ +} + +void +os_socket_close(int s) +{ +} + +#endif diff --git a/src/mesa/util/os_socket.h b/src/mesa/util/os_socket.h new file mode 100644 index 00000000..0d6f8749 --- /dev/null +++ b/src/mesa/util/os_socket.h @@ -0,0 +1,36 @@ +/* + * Copyright 2019 Intel Corporation + * SPDX-License-Identifier: MIT + * + * Socket operations helpers + */ + +#ifndef _OS_SOCKET_H_ +#define _OS_SOCKET_H_ + +#include +#include +#ifdef _MSC_VER +#include +typedef SSIZE_T ssize_t; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +int os_socket_accept(int s); + +int os_socket_listen_abstract(const char *path, int count); + +ssize_t os_socket_recv(int socket, void *buffer, size_t length, int flags); +ssize_t os_socket_send(int socket, const void *buffer, size_t length, int flags); + +void os_socket_block(int s, bool block); +void os_socket_close(int s); + +#ifdef __cplusplus +} +#endif + +#endif /* _OS_SOCKET_H_ */ diff --git a/src/mesa/util/os_time.c b/src/mesa/util/os_time.c new file mode 100644 index 00000000..969ce5ca --- /dev/null +++ b/src/mesa/util/os_time.c @@ -0,0 +1,195 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + +#include "os_time.h" +#include "detect_os.h" + +#if defined(USE_GCC_ATOMIC_BUILTINS) +/* The builtins with explicit memory model are available since GCC 4.7. */ +#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE) +#else +#define p_atomic_read(_v) (*(_v)) +#endif + +#if DETECT_OS_UNIX +# include /* usleep */ +# include /* timeval */ +# include /* timeval */ +# include /* sched_yield */ +# include +#elif DETECT_OS_WINDOWS +# include +#else +# error Unsupported OS +#endif + + +int64_t +os_time_get_nano(void) +{ +#if DETECT_OS_LINUX || DETECT_OS_BSD + + struct timespec tv; + clock_gettime(CLOCK_MONOTONIC, &tv); + return tv.tv_nsec + tv.tv_sec*INT64_C(1000000000); + +#elif DETECT_OS_UNIX + + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_usec*INT64_C(1000) + tv.tv_sec*INT64_C(1000000000); + +#elif DETECT_OS_WINDOWS + + static LARGE_INTEGER frequency; + LARGE_INTEGER counter; + int64_t secs, nanosecs; + if(!frequency.QuadPart) + QueryPerformanceFrequency(&frequency); + QueryPerformanceCounter(&counter); + /* Compute seconds and nanoseconds parts separately to + * reduce severity of precision loss. + */ + secs = counter.QuadPart / frequency.QuadPart; + nanosecs = (counter.QuadPart % frequency.QuadPart) * INT64_C(1000000000) + / frequency.QuadPart; + return secs*INT64_C(1000000000) + nanosecs; + +#else + +#error Unsupported OS + +#endif +} + + + +void +os_time_sleep(int64_t usecs) +{ +#if DETECT_OS_LINUX + struct timespec time; + time.tv_sec = usecs / 1000000; + time.tv_nsec = (usecs % 1000000) * 1000; + while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR); + +#elif DETECT_OS_UNIX + usleep(usecs); + +#elif DETECT_OS_WINDOWS + DWORD dwMilliseconds = (DWORD) ((usecs + 999) / 1000); + /* Avoid Sleep(O) as that would cause to sleep for an undetermined duration */ + if (dwMilliseconds) { + Sleep(dwMilliseconds); + } +#else +# error Unsupported OS +#endif +} + + + +int64_t +os_time_get_absolute_timeout(uint64_t timeout) +{ + int64_t time, abs_timeout; + + /* Also check for the type upper bound. */ + if (timeout == OS_TIMEOUT_INFINITE || timeout > INT64_MAX) + return OS_TIMEOUT_INFINITE; + + time = os_time_get_nano(); + abs_timeout = time + (int64_t)timeout; + + /* Check for overflow. */ + if (abs_timeout < time) + return OS_TIMEOUT_INFINITE; + + return abs_timeout; +} + + +bool +os_wait_until_zero(volatile int *var, uint64_t timeout) +{ + if (!p_atomic_read(var)) + return true; + + if (!timeout) + return false; + + if (timeout == OS_TIMEOUT_INFINITE) { + while (p_atomic_read(var)) { +#if DETECT_OS_UNIX + sched_yield(); +#endif + } + return true; + } + else { + int64_t start_time = os_time_get_nano(); + int64_t end_time = start_time + timeout; + + while (p_atomic_read(var)) { + if (os_time_timeout(start_time, end_time, os_time_get_nano())) + return false; + +#if DETECT_OS_UNIX + sched_yield(); +#endif + } + return true; + } +} + + +bool +os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout) +{ + if (!p_atomic_read(var)) + return true; + + if (timeout == OS_TIMEOUT_INFINITE) + return os_wait_until_zero(var, OS_TIMEOUT_INFINITE); + + while (p_atomic_read(var)) { + if (os_time_get_nano() >= timeout) + return false; + +#if DETECT_OS_UNIX + sched_yield(); +#endif + } + return true; +} diff --git a/src/mesa/util/os_time.h b/src/mesa/util/os_time.h new file mode 100644 index 00000000..049ab118 --- /dev/null +++ b/src/mesa/util/os_time.h @@ -0,0 +1,130 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + +#ifndef _OS_TIME_H_ +#define _OS_TIME_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* must be equal to PIPE_TIMEOUT_INFINITE */ +#define OS_TIMEOUT_INFINITE 0xffffffffffffffffull + +/* + * Get the current time in nanoseconds from an unknown base. + */ +int64_t +os_time_get_nano(void); + + +/* + * Get the current time in microseconds from an unknown base. + */ +static inline int64_t +os_time_get(void) +{ + return os_time_get_nano() / 1000; +} + + +/* + * Sleep. + */ +void +os_time_sleep(int64_t usecs); + + +/* + * Helper function for detecting time outs, taking in account overflow. + * + * Returns true if the current time has elapsed beyond the specified interval. + */ +static inline bool +os_time_timeout(int64_t start, + int64_t end, + int64_t curr) +{ + if (start <= end) + return !(start <= curr && curr < end); + else + return !((start <= curr) || (curr < end)); +} + + +/** + * Convert a relative timeout in nanoseconds into an absolute timeout, + * in other words, it returns current time + timeout. + * os_time_get_nano() must be monotonic. + * OS_TIMEOUT_INFINITE is passed through unchanged. If the calculation + * overflows, OS_TIMEOUT_INFINITE is returned. + */ +int64_t +os_time_get_absolute_timeout(uint64_t timeout); + + +/** + * Wait until the variable at the given memory location is zero. + * + * \param var variable + * \param timeout timeout in ns, can be anything from 0 (no wait) to + * OS_TIMEOUT_INFINITE (wait forever) + * \return true if the variable is zero + */ +bool +os_wait_until_zero(volatile int *var, uint64_t timeout); + + +/** + * Wait until the variable at the given memory location is zero. + * The timeout is the absolute time when the waiting should stop. If it is + * less than or equal to the current time, it only returns the status and + * doesn't wait. OS_TIMEOUT_INFINITE waits forever. This requires that + * os_time_get_nano is monotonic. + * + * \param var variable + * \param timeout the time in ns when the waiting should stop + * \return true if the variable is zero + */ +bool +os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout); + +#ifdef __cplusplus +} +#endif + +#endif /* _OS_TIME_H_ */ diff --git a/src/mesa/util/ralloc.c b/src/mesa/util/ralloc.c new file mode 100644 index 00000000..0d20223d --- /dev/null +++ b/src/mesa/util/ralloc.c @@ -0,0 +1,920 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +/* Some versions of MinGW are missing _vscprintf's declaration, although they + * still provide the symbol in the import library. */ +#ifdef __MINGW32__ +_CRTIMP int _vscprintf(const char *format, va_list argptr); +#endif + +#include "ralloc.h" + +#ifndef va_copy +#ifdef __va_copy +#define va_copy(dest, src) __va_copy((dest), (src)) +#else +#define va_copy(dest, src) (dest) = (src) +#endif +#endif + +#define CANARY 0x5A1106 + +/* Align the header's size so that ralloc() allocations will return with the + * same alignment as a libc malloc would have (8 on 32-bit GLIBC, 16 on + * 64-bit), avoiding performance penalities on x86 and alignment faults on + * ARM. + */ +struct +#ifdef _MSC_VER + __declspec(align(8)) +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + ralloc_header +{ +#ifndef NDEBUG + /* A canary value used to determine whether a pointer is ralloc'd. */ + unsigned canary; +#endif + + struct ralloc_header *parent; + + /* The first child (head of a linked list) */ + struct ralloc_header *child; + + /* Linked list of siblings */ + struct ralloc_header *prev; + struct ralloc_header *next; + + void (*destructor)(void *); +}; + +typedef struct ralloc_header ralloc_header; + +static void unlink_block(ralloc_header *info); +static void unsafe_free(ralloc_header *info); + +static ralloc_header * +get_header(const void *ptr) +{ + ralloc_header *info = (ralloc_header *) (((char *) ptr) - + sizeof(ralloc_header)); + assert(info->canary == CANARY); + return info; +} + +#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header)) + +static void +add_child(ralloc_header *parent, ralloc_header *info) +{ + if (parent != NULL) { + info->parent = parent; + info->next = parent->child; + parent->child = info; + + if (info->next != NULL) + info->next->prev = info; + } +} + +void * +ralloc_context(const void *ctx) +{ + return ralloc_size(ctx, 0); +} + +void * +ralloc_size(const void *ctx, size_t size) +{ + void *block = malloc(size + sizeof(ralloc_header)); + ralloc_header *info; + ralloc_header *parent; + + if (unlikely(block == NULL)) + return NULL; + + info = (ralloc_header *) block; + /* measurements have shown that calloc is slower (because of + * the multiplication overflow checking?), so clear things + * manually + */ + info->parent = NULL; + info->child = NULL; + info->prev = NULL; + info->next = NULL; + info->destructor = NULL; + + parent = ctx != NULL ? get_header(ctx) : NULL; + + add_child(parent, info); + +#ifndef NDEBUG + info->canary = CANARY; +#endif + + return PTR_FROM_HEADER(info); +} + +void * +rzalloc_size(const void *ctx, size_t size) +{ + void *ptr = ralloc_size(ctx, size); + + if (likely(ptr)) + memset(ptr, 0, size); + + return ptr; +} + +/* helper function - assumes ptr != NULL */ +static void * +resize(void *ptr, size_t size) +{ + ralloc_header *child, *old, *info; + + old = get_header(ptr); + info = realloc(old, size + sizeof(ralloc_header)); + + if (info == NULL) + return NULL; + + /* Update parent and sibling's links to the reallocated node. */ + if (info != old && info->parent != NULL) { + if (info->parent->child == old) + info->parent->child = info; + + if (info->prev != NULL) + info->prev->next = info; + + if (info->next != NULL) + info->next->prev = info; + } + + /* Update child->parent links for all children */ + for (child = info->child; child != NULL; child = child->next) + child->parent = info; + + return PTR_FROM_HEADER(info); +} + +void * +reralloc_size(const void *ctx, void *ptr, size_t size) +{ + if (unlikely(ptr == NULL)) + return ralloc_size(ctx, size); + + assert(ralloc_parent(ptr) == ctx); + return resize(ptr, size); +} + +void * +rerzalloc_size(const void *ctx, void *ptr, size_t old_size, size_t new_size) +{ + if (unlikely(ptr == NULL)) + return rzalloc_size(ctx, new_size); + + assert(ralloc_parent(ptr) == ctx); + ptr = resize(ptr, new_size); + + if (new_size > old_size) + memset((char *)ptr + old_size, 0, new_size - old_size); + + return ptr; +} + +void * +ralloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return ralloc_size(ctx, size * count); +} + +void * +rzalloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return rzalloc_size(ctx, size * count); +} + +void * +reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return reralloc_size(ctx, ptr, size * count); +} + +void * +rerzalloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned old_count, unsigned new_count) +{ + if (new_count > SIZE_MAX/size) + return NULL; + + return rerzalloc_size(ctx, ptr, size * old_count, size * new_count); +} + +void +ralloc_free(void *ptr) +{ + ralloc_header *info; + + if (ptr == NULL) + return; + + info = get_header(ptr); + unlink_block(info); + unsafe_free(info); +} + +static void +unlink_block(ralloc_header *info) +{ + /* Unlink from parent & siblings */ + if (info->parent != NULL) { + if (info->parent->child == info) + info->parent->child = info->next; + + if (info->prev != NULL) + info->prev->next = info->next; + + if (info->next != NULL) + info->next->prev = info->prev; + } + info->parent = NULL; + info->prev = NULL; + info->next = NULL; +} + +static void +unsafe_free(ralloc_header *info) +{ + /* Recursively free any children...don't waste time unlinking them. */ + ralloc_header *temp; + while (info->child != NULL) { + temp = info->child; + info->child = temp->next; + unsafe_free(temp); + } + + /* Free the block itself. Call the destructor first, if any. */ + if (info->destructor != NULL) + info->destructor(PTR_FROM_HEADER(info)); + + free(info); +} + +void +ralloc_steal(const void *new_ctx, void *ptr) +{ + ralloc_header *info, *parent; + + if (unlikely(ptr == NULL)) + return; + + info = get_header(ptr); + parent = new_ctx ? get_header(new_ctx) : NULL; + + unlink_block(info); + + add_child(parent, info); +} + +void +ralloc_adopt(const void *new_ctx, void *old_ctx) +{ + ralloc_header *new_info, *old_info, *child; + + if (unlikely(old_ctx == NULL)) + return; + + old_info = get_header(old_ctx); + new_info = get_header(new_ctx); + + /* If there are no children, bail. */ + if (unlikely(old_info->child == NULL)) + return; + + /* Set all the children's parent to new_ctx; get a pointer to the last child. */ + for (child = old_info->child; child->next != NULL; child = child->next) { + child->parent = new_info; + } + child->parent = new_info; + + /* Connect the two lists together; parent them to new_ctx; make old_ctx empty. */ + child->next = new_info->child; + if (child->next) + child->next->prev = child; + new_info->child = old_info->child; + old_info->child = NULL; +} + +void * +ralloc_parent(const void *ptr) +{ + ralloc_header *info; + + if (unlikely(ptr == NULL)) + return NULL; + + info = get_header(ptr); + return info->parent ? PTR_FROM_HEADER(info->parent) : NULL; +} + +void +ralloc_set_destructor(const void *ptr, void(*destructor)(void *)) +{ + ralloc_header *info = get_header(ptr); + info->destructor = destructor; +} + +char * +ralloc_strdup(const void *ctx, const char *str) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strlen(str); + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +char * +ralloc_strndup(const void *ctx, const char *str, size_t max) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strnlen(str, max); + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +/* helper routine for strcat/strncat - n is the exact amount to copy */ +static bool +cat(char **dest, const char *str, size_t n) +{ + char *both; + size_t existing_length; + assert(dest != NULL && *dest != NULL); + + existing_length = strlen(*dest); + both = resize(*dest, existing_length + n + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, n); + both[existing_length + n] = '\0'; + + *dest = both; + return true; +} + + +bool +ralloc_strcat(char **dest, const char *str) +{ + return cat(dest, str, strlen(str)); +} + +bool +ralloc_strncat(char **dest, const char *str, size_t n) +{ + return cat(dest, str, strnlen(str, n)); +} + +bool +ralloc_str_append(char **dest, const char *str, + size_t existing_length, size_t str_size) +{ + char *both; + assert(dest != NULL && *dest != NULL); + + both = resize(*dest, existing_length + str_size + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, str_size); + both[existing_length + str_size] = '\0'; + + *dest = both; + + return true; +} + +char * +ralloc_asprintf(const void *ctx, const char *fmt, ...) +{ + char *ptr; + va_list args; + va_start(args, fmt); + ptr = ralloc_vasprintf(ctx, fmt, args); + va_end(args); + return ptr; +} + +/* Return the length of the string that would be generated by a printf-style + * format and argument list, not including the \0 byte. + */ +static size_t +printf_length(const char *fmt, va_list untouched_args) +{ + int size; + char junk; + + /* Make a copy of the va_list so the original caller can still use it */ + va_list args; + va_copy(args, untouched_args); + +#ifdef _WIN32 + /* We need to use _vcsprintf to calculate the size as vsnprintf returns -1 + * if the number of characters to write is greater than count. + */ + size = _vscprintf(fmt, args); + (void)junk; +#else + size = vsnprintf(&junk, 1, fmt, args); +#endif + assert(size >= 0); + + va_end(args); + + return size; +} + +char * +ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) +{ + size_t size = printf_length(fmt, args) + 1; + + char *ptr = ralloc_size(ctx, size); + if (ptr != NULL) + vsnprintf(ptr, size, fmt, args); + + return ptr; +} + +bool +ralloc_asprintf_append(char **str, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_append(str, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_append(char **str, const char *fmt, va_list args) +{ + size_t existing_length; + assert(str != NULL); + existing_length = *str ? strlen(*str) : 0; + return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args); +} + +bool +ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args) +{ + size_t new_length; + char *ptr; + + assert(str != NULL); + + if (unlikely(*str == NULL)) { + // Assuming a NULL context is probably bad, but it's expected behavior. + *str = ralloc_vasprintf(NULL, fmt, args); + *start = strlen(*str); + return true; + } + + new_length = printf_length(fmt, args); + + ptr = resize(*str, *start + new_length + 1); + if (unlikely(ptr == NULL)) + return false; + + vsnprintf(ptr + *start, new_length + 1, fmt, args); + *str = ptr; + *start += new_length; + return true; +} + +/*************************************************************************** + * Linear allocator for short-lived allocations. + *************************************************************************** + * + * The allocator consists of a parent node (2K buffer), which requires + * a ralloc parent, and child nodes (allocations). Child nodes can't be freed + * directly, because the parent doesn't track them. You have to release + * the parent node in order to release all its children. + * + * The allocator uses a fixed-sized buffer with a monotonically increasing + * offset after each allocation. If the buffer is all used, another buffer + * is allocated, sharing the same ralloc parent, so all buffers are at + * the same level in the ralloc hierarchy. + * + * The linear parent node is always the first buffer and keeps track of all + * other buffers. + */ + +#define MIN_LINEAR_BUFSIZE 2048 +#define SUBALLOC_ALIGNMENT 8 +#define LMAGIC 0x87b9c7d3 + +struct +#ifdef _MSC_VER + __declspec(align(8)) +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + linear_header { +#ifndef NDEBUG + unsigned magic; /* for debugging */ +#endif + unsigned offset; /* points to the first unused byte in the buffer */ + unsigned size; /* size of the buffer */ + void *ralloc_parent; /* new buffers will use this */ + struct linear_header *next; /* next buffer if we have more */ + struct linear_header *latest; /* the only buffer that has free space */ + + /* After this structure, the buffer begins. + * Each suballocation consists of linear_size_chunk as its header followed + * by the suballocation, so it goes: + * + * - linear_size_chunk + * - allocated space + * - linear_size_chunk + * - allocated space + * etc. + * + * linear_size_chunk is only needed by linear_realloc. + */ +}; + +struct linear_size_chunk { + unsigned size; /* for realloc */ + unsigned _padding; +}; + +typedef struct linear_header linear_header; +typedef struct linear_size_chunk linear_size_chunk; + +#define LINEAR_PARENT_TO_HEADER(parent) \ + (linear_header*) \ + ((char*)(parent) - sizeof(linear_size_chunk) - sizeof(linear_header)) + +/* Allocate the linear buffer with its header. */ +static linear_header * +create_linear_node(void *ralloc_ctx, unsigned min_size) +{ + linear_header *node; + + min_size += sizeof(linear_size_chunk); + + if (likely(min_size < MIN_LINEAR_BUFSIZE)) + min_size = MIN_LINEAR_BUFSIZE; + + node = ralloc_size(ralloc_ctx, sizeof(linear_header) + min_size); + if (unlikely(!node)) + return NULL; + +#ifndef NDEBUG + node->magic = LMAGIC; +#endif + node->offset = 0; + node->size = min_size; + node->ralloc_parent = ralloc_ctx; + node->next = NULL; + node->latest = node; + return node; +} + +void * +linear_alloc_child(void *parent, unsigned size) +{ + linear_header *first = LINEAR_PARENT_TO_HEADER(parent); + linear_header *latest = first->latest; + linear_header *new_node; + linear_size_chunk *ptr; + unsigned full_size; + + assert(first->magic == LMAGIC); + assert(!latest->next); + + size = ALIGN_POT(size, SUBALLOC_ALIGNMENT); + full_size = sizeof(linear_size_chunk) + size; + + if (unlikely(latest->offset + full_size > latest->size)) { + /* allocate a new node */ + new_node = create_linear_node(latest->ralloc_parent, size); + if (unlikely(!new_node)) + return NULL; + + first->latest = new_node; + latest->latest = new_node; + latest->next = new_node; + latest = new_node; + } + + ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset); + ptr->size = size; + latest->offset += full_size; + + assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0); + return &ptr[1]; +} + +void * +linear_alloc_parent(void *ralloc_ctx, unsigned size) +{ + linear_header *node; + + if (unlikely(!ralloc_ctx)) + return NULL; + + size = ALIGN_POT(size, SUBALLOC_ALIGNMENT); + + node = create_linear_node(ralloc_ctx, size); + if (unlikely(!node)) + return NULL; + + return linear_alloc_child((char*)node + + sizeof(linear_header) + + sizeof(linear_size_chunk), size); +} + +void * +linear_zalloc_child(void *parent, unsigned size) +{ + void *ptr = linear_alloc_child(parent, size); + + if (likely(ptr)) + memset(ptr, 0, size); + return ptr; +} + +void * +linear_zalloc_parent(void *parent, unsigned size) +{ + void *ptr = linear_alloc_parent(parent, size); + + if (likely(ptr)) + memset(ptr, 0, size); + return ptr; +} + +void +linear_free_parent(void *ptr) +{ + linear_header *node; + + if (unlikely(!ptr)) + return; + + node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + + while (node) { + void *ptr = node; + + node = node->next; + ralloc_free(ptr); + } +} + +void +ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr) +{ + linear_header *node; + + if (unlikely(!ptr)) + return; + + node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + + while (node) { + ralloc_steal(new_ralloc_ctx, node); + node->ralloc_parent = new_ralloc_ctx; + node = node->next; + } +} + +void * +ralloc_parent_of_linear_parent(void *ptr) +{ + linear_header *node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + return node->ralloc_parent; +} + +void * +linear_realloc(void *parent, void *old, unsigned new_size) +{ + unsigned old_size = 0; + ralloc_header *new_ptr; + + new_ptr = linear_alloc_child(parent, new_size); + + if (unlikely(!old)) + return new_ptr; + + old_size = ((linear_size_chunk*)old)[-1].size; + + if (likely(new_ptr && old_size)) + memcpy(new_ptr, old, MIN2(old_size, new_size)); + + return new_ptr; +} + +/* All code below is pretty much copied from ralloc and only the alloc + * calls are different. + */ + +char * +linear_strdup(void *parent, const char *str) +{ + unsigned n; + char *ptr; + + if (unlikely(!str)) + return NULL; + + n = strlen(str); + ptr = linear_alloc_child(parent, n + 1); + if (unlikely(!ptr)) + return NULL; + + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +char * +linear_asprintf(void *parent, const char *fmt, ...) +{ + char *ptr; + va_list args; + va_start(args, fmt); + ptr = linear_vasprintf(parent, fmt, args); + va_end(args); + return ptr; +} + +char * +linear_vasprintf(void *parent, const char *fmt, va_list args) +{ + unsigned size = printf_length(fmt, args) + 1; + + char *ptr = linear_alloc_child(parent, size); + if (ptr != NULL) + vsnprintf(ptr, size, fmt, args); + + return ptr; +} + +bool +linear_asprintf_append(void *parent, char **str, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = linear_vasprintf_append(parent, str, fmt, args); + va_end(args); + return success; +} + +bool +linear_vasprintf_append(void *parent, char **str, const char *fmt, va_list args) +{ + size_t existing_length; + assert(str != NULL); + existing_length = *str ? strlen(*str) : 0; + return linear_vasprintf_rewrite_tail(parent, str, &existing_length, fmt, args); +} + +bool +linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = linear_vasprintf_rewrite_tail(parent, str, start, fmt, args); + va_end(args); + return success; +} + +bool +linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, va_list args) +{ + size_t new_length; + char *ptr; + + assert(str != NULL); + + if (unlikely(*str == NULL)) { + *str = linear_vasprintf(parent, fmt, args); + *start = strlen(*str); + return true; + } + + new_length = printf_length(fmt, args); + + ptr = linear_realloc(parent, *str, *start + new_length + 1); + if (unlikely(ptr == NULL)) + return false; + + vsnprintf(ptr + *start, new_length + 1, fmt, args); + *str = ptr; + *start += new_length; + return true; +} + +/* helper routine for strcat/strncat - n is the exact amount to copy */ +static bool +linear_cat(void *parent, char **dest, const char *str, unsigned n) +{ + char *both; + unsigned existing_length; + assert(dest != NULL && *dest != NULL); + + existing_length = strlen(*dest); + both = linear_realloc(parent, *dest, existing_length + n + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, n); + both[existing_length + n] = '\0'; + + *dest = both; + return true; +} + +bool +linear_strcat(void *parent, char **dest, const char *str) +{ + return linear_cat(parent, dest, str, strlen(str)); +} diff --git a/src/mesa/util/ralloc.h b/src/mesa/util/ralloc.h new file mode 100644 index 00000000..857ca5f7 --- /dev/null +++ b/src/mesa/util/ralloc.h @@ -0,0 +1,604 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ralloc.h + * + * ralloc: a recursive memory allocator + * + * The ralloc memory allocator creates a hierarchy of allocated + * objects. Every allocation is in reference to some parent, and + * every allocated object can in turn be used as the parent of a + * subsequent allocation. This allows for extremely convenient + * discarding of an entire tree/sub-tree of allocations by calling + * ralloc_free on any particular object to free it and all of its + * children. + * + * The conceptual working of ralloc was directly inspired by Andrew + * Tridgell's talloc, but ralloc is an independent implementation + * released under the MIT license and tuned for Mesa. + * + * talloc is more sophisticated than ralloc in that it includes reference + * counting and useful debugging features. However, it is released under + * a non-permissive open source license. + */ + +#ifndef RALLOC_H +#define RALLOC_H + +#include +#include +#include + +#include "macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \def ralloc(ctx, type) + * Allocate a new object chained off of the given context. + * + * This is equivalent to: + * \code + * ((type *) ralloc_size(ctx, sizeof(type)) + * \endcode + */ +#define ralloc(ctx, type) ((type *) ralloc_size(ctx, sizeof(type))) + +/** + * \def rzalloc(ctx, type) + * Allocate a new object out of the given context and initialize it to zero. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_size(ctx, sizeof(type)) + * \endcode + */ +#define rzalloc(ctx, type) ((type *) rzalloc_size(ctx, sizeof(type))) + +/** + * Allocate a new ralloc context. + * + * While any ralloc'd pointer can be used as a context, sometimes it is useful + * to simply allocate a context with no associated memory. + * + * It is equivalent to: + * \code + * ((type *) ralloc_size(ctx, 0) + * \endcode + */ +void *ralloc_context(const void *ctx); + +/** + * Allocate memory chained off of the given context. + * + * This is the core allocation routine which is used by all others. It + * simply allocates storage for \p size bytes and returns the pointer, + * similar to \c malloc. + */ +void *ralloc_size(const void *ctx, size_t size) MALLOCLIKE; + +/** + * Allocate zero-initialized memory chained off of the given context. + * + * This is similar to \c calloc with a size of 1. + */ +void *rzalloc_size(const void *ctx, size_t size) MALLOCLIKE; + +/** + * Resize a piece of ralloc-managed memory, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the memory to be resized. May be NULL. + * \param size The amount of memory to allocate, in bytes. + */ +void *reralloc_size(const void *ctx, void *ptr, size_t size); + +/** + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the memory to be resized. May be NULL. + * \param old_size The amount of memory in the previous allocation, in bytes. + * \param new_size The amount of memory to allocate, in bytes. + */ +void *rerzalloc_size(const void *ctx, void *ptr, + size_t old_size, size_t new_size); + +/// \defgroup array Array Allocators @{ + +/** + * \def ralloc_array(ctx, type, count) + * Allocate an array of objects chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) ralloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define ralloc_array(ctx, type, count) \ + ((type *) ralloc_array_size(ctx, sizeof(type), count)) + +/** + * \def rzalloc_array(ctx, type, count) + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define rzalloc_array(ctx, type, count) \ + ((type *) rzalloc_array_size(ctx, sizeof(type), count)) + +/** + * \def reralloc(ctx, ptr, type, count) + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param type The element type. + * \param count The number of elements to allocate. + */ +#define reralloc(ctx, ptr, type, count) \ + ((type *) reralloc_array_size(ctx, ptr, sizeof(type), count)) + +/** + * \def rerzalloc(ctx, ptr, type, count) + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param type The element type. + * \param old_count The number of elements in the previous allocation. + * \param new_count The number of elements to allocate. + */ +#define rerzalloc(ctx, ptr, type, old_count, new_count) \ + ((type *) rerzalloc_array_size(ctx, ptr, sizeof(type), old_count, new_count)) + +/** + * Allocate memory for an array chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *ralloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE; + +/** + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *rzalloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE; + +/** + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param size The size of an individual element. + * \param count The number of elements to allocate. + * + * \return True unless allocation failed. + */ +void *reralloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned count); + +/** + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param size The size of an individual element. + * \param old_count The number of elements in the previous allocation. + * \param new_count The number of elements to allocate. + * + * \return True unless allocation failed. + */ +void *rerzalloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned old_count, unsigned new_count); +/// @} + +/** + * Free a piece of ralloc-managed memory. + * + * This will also free the memory of any children allocated this context. + */ +void ralloc_free(void *ptr); + +/** + * "Steal" memory from one context, changing it to another. + * + * This changes \p ptr's context to \p new_ctx. This is quite useful if + * memory is allocated out of a temporary context. + */ +void ralloc_steal(const void *new_ctx, void *ptr); + +/** + * Reparent all children from one context to another. + * + * This effectively calls ralloc_steal(new_ctx, child) for all children of \p old_ctx. + */ +void ralloc_adopt(const void *new_ctx, void *old_ctx); + +/** + * Return the given pointer's ralloc context. + */ +void *ralloc_parent(const void *ptr); + +/** + * Set a callback to occur just before an object is freed. + */ +void ralloc_set_destructor(const void *ptr, void(*destructor)(void *)); + +/// \defgroup array String Functions @{ +/** + * Duplicate a string, allocating the memory from the given context. + */ +char *ralloc_strdup(const void *ctx, const char *str) MALLOCLIKE; + +/** + * Duplicate a string, allocating the memory from the given context. + * + * Like \c strndup, at most \p n characters are copied. If \p str is longer + * than \p n characters, \p n are copied, and a termining \c '\0' byte is added. + */ +char *ralloc_strndup(const void *ctx, const char *str, size_t n) MALLOCLIKE; + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends \p str to \p *dest, similar to \c strcat, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated. + * + * \return True unless allocation failed. + */ +bool ralloc_strcat(char **dest, const char *str); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends at most \p n bytes of \p str to \p *dest, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated; \p str does not need to be null + * terminated if it is longer than \p n. + * + * \return True unless allocation failed. + */ +bool ralloc_strncat(char **dest, const char *str, size_t n); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends \p n bytes of \p str to \p *dest, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated. + * + * This function differs from ralloc_strcat() and ralloc_strncat() in that it + * does not do any strlen() calls which can become costly on large strings. + * + * \return True unless allocation failed. + */ +bool +ralloc_str_append(char **dest, const char *str, + size_t existing_length, size_t str_size); + +/** + * Print to a string. + * + * This is analogous to \c sprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_asprintf (const void *ctx, const char *fmt, ...) PRINTFLIKE(2, 3) MALLOCLIKE; + +/** + * Print to a string, given a va_list. + * + * This is analogous to \c vsprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) MALLOCLIKE; + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_asprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_rewrite_tail(char **str, size_t *start, + const char *fmt, ...) + PRINTFLIKE(3, 4); + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_vasprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * \param args A va_list containing the data to be formatted + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args); + +/** + * Append formatted text to the supplied string. + * + * This is equivalent to + * \code + * ralloc_asprintf_rewrite_tail(str, strlen(*str), fmt, ...) + * \endcode + * + * \sa ralloc_asprintf + * \sa ralloc_asprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_append (char **str, const char *fmt, ...) + PRINTFLIKE(2, 3); + +/** + * Append formatted text to the supplied string, given a va_list. + * + * This is equivalent to + * \code + * ralloc_vasprintf_rewrite_tail(str, strlen(*str), fmt, args) + * \endcode + * + * \sa ralloc_vasprintf + * \sa ralloc_vasprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_append(char **str, const char *fmt, va_list args); +/// @} + +/** + * Declare C++ new and delete operators which use ralloc. + * + * Placing this macro in the body of a class makes it possible to do: + * + * TYPE *var = new(mem_ctx) TYPE(...); + * delete var; + * + * which is more idiomatic in C++ than calling ralloc. + */ +#define DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(TYPE, ALLOC_FUNC) \ +private: \ + static void _ralloc_destructor(void *p) \ + { \ + reinterpret_cast(p)->TYPE::~TYPE(); \ + } \ +public: \ + static void* operator new(size_t size, void *mem_ctx) \ + { \ + void *p = ALLOC_FUNC(mem_ctx, size); \ + assert(p != NULL); \ + if (!HAS_TRIVIAL_DESTRUCTOR(TYPE)) \ + ralloc_set_destructor(p, _ralloc_destructor); \ + return p; \ + } \ + \ + static void operator delete(void *p) \ + { \ + /* The object's destructor is guaranteed to have already been \ + * called by the delete operator at this point -- Make sure it's \ + * not called again. \ + */ \ + if (!HAS_TRIVIAL_DESTRUCTOR(TYPE)) \ + ralloc_set_destructor(p, NULL); \ + ralloc_free(p); \ + } + +#define DECLARE_RALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, ralloc_size) + +#define DECLARE_RZALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, rzalloc_size) + +#define DECLARE_LINEAR_ALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_alloc_child) + +#define DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_zalloc_child) + + +/** + * Do a fast allocation from the linear buffer, also known as the child node + * from the allocator's point of view. It can't be freed directly. You have + * to free the parent or the ralloc parent. + * + * \param parent parent node of the linear allocator + * \param size size to allocate (max 32 bits) + */ +void *linear_alloc_child(void *parent, unsigned size); + +/** + * Allocate a parent node that will hold linear buffers. The returned + * allocation is actually the first child node, but it's also the handle + * of the parent node. Use it for all child node allocations. + * + * \param ralloc_ctx ralloc context, must not be NULL + * \param size size to allocate (max 32 bits) + */ +void *linear_alloc_parent(void *ralloc_ctx, unsigned size); + +/** + * Same as linear_alloc_child, but also clears memory. + */ +void *linear_zalloc_child(void *parent, unsigned size); + +/** + * Same as linear_alloc_parent, but also clears memory. + */ +void *linear_zalloc_parent(void *ralloc_ctx, unsigned size); + +/** + * Free the linear parent node. This will free all child nodes too. + * Freeing the ralloc parent will also free this. + */ +void linear_free_parent(void *ptr); + +/** + * Same as ralloc_steal, but steals the linear parent node. + */ +void ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr); + +/** + * Return the ralloc parent of the linear parent node. + */ +void *ralloc_parent_of_linear_parent(void *ptr); + +/** + * Same as realloc except that the linear allocator doesn't free child nodes, + * so it's reduced to memory duplication. It's used in places where + * reallocation is required. Don't use it often. It's much slower than + * realloc. + */ +void *linear_realloc(void *parent, void *old, unsigned new_size); + +/* The functions below have the same semantics as their ralloc counterparts, + * except that they always allocate a linear child node. + */ +char *linear_strdup(void *parent, const char *str); +char *linear_asprintf(void *parent, const char *fmt, ...); +char *linear_vasprintf(void *parent, const char *fmt, va_list args); +bool linear_asprintf_append(void *parent, char **str, const char *fmt, ...); +bool linear_vasprintf_append(void *parent, char **str, const char *fmt, + va_list args); +bool linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, ...); +bool linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, va_list args); +bool linear_strcat(void *parent, char **dest, const char *str); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif diff --git a/src/mesa/util/simple_mtx.h b/src/mesa/util/simple_mtx.h new file mode 100644 index 00000000..2be576ec --- /dev/null +++ b/src/mesa/util/simple_mtx.h @@ -0,0 +1,147 @@ +/* + * Copyright © 2015 Intel + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _SIMPLE_MTX_H +#define _SIMPLE_MTX_H + +#include "../util/futex.h" + +#include "../c11/threads.h" + +#if defined(__GNUC__) && defined(HAVE_LINUX_FUTEX_H) + +/* mtx_t - Fast, simple mutex + * + * While modern pthread mutexes are very fast (implemented using futex), they + * still incur a call to an external DSO and overhead of the generality and + * features of pthread mutexes. Most mutexes in mesa only needs lock/unlock, + * and the idea here is that we can inline the atomic operation and make the + * fast case just two intructions. Mutexes are subtle and finicky to + * implement, so we carefully copy the implementation from Ulrich Dreppers + * well-written and well-reviewed paper: + * + * "Futexes Are Tricky" + * http://www.akkadia.org/drepper/futex.pdf + * + * We implement "mutex3", which gives us a mutex that has no syscalls on + * uncontended lock or unlock. Further, the uncontended case boils down to a + * locked cmpxchg and an untaken branch, the uncontended unlock is just a + * locked decr and an untaken branch. We use __builtin_expect() to indicate + * that contention is unlikely so that gcc will put the contention code out of + * the main code flow. + * + * A fast mutex only supports lock/unlock, can't be recursive or used with + * condition variables. + */ + +typedef struct { + uint32_t val; +} simple_mtx_t; + +#define _SIMPLE_MTX_INITIALIZER_NP { 0 } + +#define _SIMPLE_MTX_INVALID_VALUE 0xd0d0d0d0 + +static inline void +simple_mtx_init(simple_mtx_t *mtx, ASSERTED int type) +{ + assert(type == mtx_plain); + + mtx->val = 0; +} + +static inline void +simple_mtx_destroy(ASSERTED simple_mtx_t *mtx) +{ +#ifndef NDEBUG + mtx->val = _SIMPLE_MTX_INVALID_VALUE; +#endif +} + +static inline void +simple_mtx_lock(simple_mtx_t *mtx) +{ + uint32_t c; + + c = __sync_val_compare_and_swap(&mtx->val, 0, 1); + + assert(c != _SIMPLE_MTX_INVALID_VALUE); + + if (__builtin_expect(c != 0, 0)) { + if (c != 2) + c = __sync_lock_test_and_set(&mtx->val, 2); + while (c != 0) { + futex_wait(&mtx->val, 2, NULL); + c = __sync_lock_test_and_set(&mtx->val, 2); + } + } +} + +static inline void +simple_mtx_unlock(simple_mtx_t *mtx) +{ + uint32_t c; + + c = __sync_fetch_and_sub(&mtx->val, 1); + + assert(c != _SIMPLE_MTX_INVALID_VALUE); + + if (__builtin_expect(c != 1, 0)) { + mtx->val = 0; + futex_wake(&mtx->val, 1); + } +} + +#else + +typedef mtx_t simple_mtx_t; + +#define _SIMPLE_MTX_INITIALIZER_NP _MTX_INITIALIZER_NP + +static inline void +simple_mtx_init(simple_mtx_t *mtx, int type) +{ + mtx_init(mtx, type); +} + +static inline void +simple_mtx_destroy(simple_mtx_t *mtx) +{ + mtx_destroy(mtx); +} + +static inline void +simple_mtx_lock(simple_mtx_t *mtx) +{ + mtx_lock(mtx); +} + +static inline void +simple_mtx_unlock(simple_mtx_t *mtx) +{ + mtx_unlock(mtx); +} + +#endif + +#endif diff --git a/src/meson.build b/src/meson.build new file mode 100644 index 00000000..d5bb28ba --- /dev/null +++ b/src/meson.build @@ -0,0 +1,83 @@ +# Copyright © 2019 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +glslang = find_program('glslangValidator') + +overlay_shaders = [ + 'overlay.frag', + 'overlay.vert', +] +overlay_spv = [] +foreach s : ['overlay.frag', 'overlay.vert'] + overlay_spv += custom_target( + s + '.spv.h', input : s, output : s + '.spv.h', + command : [glslang, '-V', '-x', '-o', '@OUTPUT@', '@INPUT@']) +endforeach + +vklayer_files = files( + 'overlay.cpp', + 'overlay_params.c', + # 'font_unispace.c', +) + +vklayer_mesa_overlay = shared_library( + 'MangoHud', + util_files, + vk_enum_to_str, + vklayer_files, + overlay_spv, + vk_layer_table_helpers, + c_args : [ + pre_args, + c_vis_args, + no_override_init_args, + vulkan_wsi_args + ], + cpp_args : [ + pre_args, + cpp_vis_args, + vulkan_wsi_args + ], + dependencies : [ + vulkan_wsi_deps, + libimgui_core_dep, + dep_dl, + dep_pthread], + include_directories : inc_common, + link_args : cc.get_supported_link_arguments(['-Wl,-Bsymbolic-functions', '-Wl,-z,relro']), + install : true +) + +install_data( + files('mangohud.json'), + install_dir : join_paths(get_option('datadir'), 'vulkan', 'implicit_layer.d'), +) + +install_data( + files('setup_mangohud.sh'), + install_dir: get_option('bindir'), +) + +configure_file( + input : files('mesa-overlay-control.py'), + output : '@PLAINNAME@', + configuration : configuration_data(), # only copy the file + install_dir: get_option('bindir'), +) diff --git a/src/overlay.cpp b/src/overlay.cpp new file mode 100644 index 00000000..6b24ec63 --- /dev/null +++ b/src/overlay.cpp @@ -0,0 +1,2837 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include + +#include +#include + +// #include "git_sha1.h" + +#include "imgui.h" + +#include "overlay_params.h" + +// #include "util/debug.h" +#include "mesa/util/hash_table.h" +#include "mesa/util/list.h" +#include "mesa/util/ralloc.h" +#include "mesa/util/os_time.h" +#include "mesa/util/os_socket.h" +#include "mesa/util/simple_mtx.h" + +#include "vk_enum_to_str.h" +#include "../include/vulkan/vk_util.h" + +#include "cpu_gpu.h" +#include "logging.h" +#include "keybinds.h" + +bool open = false, displayHud = true; +string gpuString; +float offset_x, offset_y, hudSpacing; +int hudFirstRow, hudSecondRow; +const char* offset_x_env = std::getenv("X_OFFSET"); +const char* offset_y_env = std::getenv("Y_OFFSET"); +string engineName, engineVersion; +ImFont* font1; + +/* Mapped from VkInstace/VkPhysicalDevice */ +struct instance_data { + struct vk_instance_dispatch_table vtable; + VkInstance instance; + + struct overlay_params params; + bool pipeline_statistics_enabled; + + bool first_line_printed; + + int control_client; + + /* Dumping of frame stats to a file has been enabled. */ + bool capture_enabled; + + /* Dumping of frame stats to a file has been enabled and started. */ + bool capture_started; +}; + +struct frame_stat { + uint64_t stats[OVERLAY_PARAM_ENABLED_MAX]; +}; + +/* Mapped from VkDevice */ +struct queue_data; +struct device_data { + struct instance_data *instance; + + PFN_vkSetDeviceLoaderData set_device_loader_data; + + struct vk_device_dispatch_table vtable; + VkPhysicalDevice physical_device; + VkDevice device; + + VkPhysicalDeviceProperties properties; + + struct queue_data *graphic_queue; + + struct queue_data **queues; + uint32_t n_queues; + + /* For a single frame */ + struct frame_stat frame_stats; +}; + +/* Mapped from VkCommandBuffer */ +struct command_buffer_data { + struct device_data *device; + + VkCommandBufferLevel level; + + VkCommandBuffer cmd_buffer; + VkQueryPool pipeline_query_pool; + VkQueryPool timestamp_query_pool; + uint32_t query_index; + + struct frame_stat stats; + + struct list_head link; /* link into queue_data::running_command_buffer */ +}; + +/* Mapped from VkQueue */ +struct queue_data { + struct device_data *device; + + VkQueue queue; + VkQueueFlags flags; + uint32_t family_index; + uint64_t timestamp_mask; + + VkFence queries_fence; + + struct list_head running_command_buffer; +}; + +struct overlay_draw { + struct list_head link; + + VkCommandBuffer command_buffer; + + VkSemaphore semaphore; + VkFence fence; + + VkBuffer vertex_buffer; + VkDeviceMemory vertex_buffer_mem; + VkDeviceSize vertex_buffer_size; + + VkBuffer index_buffer; + VkDeviceMemory index_buffer_mem; + VkDeviceSize index_buffer_size; +}; + +/* Mapped from VkSwapchainKHR */ +struct swapchain_data { + struct device_data *device; + + VkSwapchainKHR swapchain; + unsigned width, height; + VkFormat format; + + uint32_t n_images; + VkImage *images; + VkImageView *image_views; + VkFramebuffer *framebuffers; + + VkRenderPass render_pass; + + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_layout; + VkDescriptorSet descriptor_set; + + VkSampler font_sampler; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + VkCommandPool command_pool; + + struct list_head draws; /* List of struct overlay_draw */ + + bool font_uploaded; + VkImage font_image; + VkImageView font_image_view; + VkDeviceMemory font_mem; + VkBuffer upload_font_buffer; + VkDeviceMemory upload_font_buffer_mem; + + /**/ + ImGuiContext* imgui_context; + ImVec2 window_size; + + /**/ + uint64_t n_frames; + uint64_t last_present_time; + + unsigned n_frames_since_update; + uint64_t last_fps_update; + double fps; + double frametime; + double frametimeDisplay; + const char* cpuString; + const char* gpuString; + + enum overlay_param_enabled stat_selector; + double time_dividor; + struct frame_stat stats_min, stats_max; + struct frame_stat frames_stats[200]; + + /* Over a single frame */ + struct frame_stat frame_stats; + + /* Over fps_sampling_period */ + struct frame_stat accumulated_stats; +}; + +static const VkQueryPipelineStatisticFlags overlay_query_flags = + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; +#define OVERLAY_QUERY_COUNT (11) + +static struct hash_table_u64 *vk_object_to_data = NULL; +static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP; + +thread_local ImGuiContext* __MesaImGui; + +static inline void ensure_vk_object_map(void) +{ + if (!vk_object_to_data) + vk_object_to_data = _mesa_hash_table_u64_create(NULL); +} + +#define HKEY(obj) ((uint64_t)(obj)) +#define FIND(type, obj) ((type *)find_object_data(HKEY(obj))) + +static void *find_object_data(uint64_t obj) +{ + simple_mtx_lock(&vk_object_to_data_mutex); + ensure_vk_object_map(); + void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj); + simple_mtx_unlock(&vk_object_to_data_mutex); + return data; +} + +static void map_object(uint64_t obj, void *data) +{ + simple_mtx_lock(&vk_object_to_data_mutex); + ensure_vk_object_map(); + _mesa_hash_table_u64_insert(vk_object_to_data, obj, data); + simple_mtx_unlock(&vk_object_to_data_mutex); +} + +static void unmap_object(uint64_t obj) +{ + simple_mtx_lock(&vk_object_to_data_mutex); + _mesa_hash_table_u64_remove(vk_object_to_data, obj); + simple_mtx_unlock(&vk_object_to_data_mutex); +} + +/**/ + +#define VK_CHECK(expr) \ + do { \ + VkResult __result = (expr); \ + if (__result != VK_SUCCESS) { \ + fprintf(stderr, "'%s' line %i failed with %s\n", \ + #expr, __LINE__, vk_Result_to_str(__result)); \ + } \ + } while (0) + +/**/ + +static VkLayerInstanceCreateInfo *get_instance_chain_info(const VkInstanceCreateInfo *pCreateInfo, + VkLayerFunction func) +{ + vk_foreach_struct(item, pCreateInfo->pNext) { + if (item->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && + ((VkLayerInstanceCreateInfo *) item)->function == func) + return (VkLayerInstanceCreateInfo *) item; + } + unreachable("instance chain info not found"); + return NULL; +} + +static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo, + VkLayerFunction func) +{ + vk_foreach_struct(item, pCreateInfo->pNext) { + if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && + ((VkLayerDeviceCreateInfo *) item)->function == func) + return (VkLayerDeviceCreateInfo *)item; + } + unreachable("device chain info not found"); + return NULL; +} + +static struct VkBaseOutStructure * +clone_chain(const struct VkBaseInStructure *chain) +{ + struct VkBaseOutStructure *head = NULL, *tail = NULL; + + vk_foreach_struct_const(item, chain) { + size_t item_size = vk_structure_type_size(item); + struct VkBaseOutStructure *new_item = + (struct VkBaseOutStructure *)malloc(item_size);; + + memcpy(new_item, item, item_size); + + if (!head) + head = new_item; + if (tail) + tail->pNext = new_item; + tail = new_item; + } + + return head; +} + +static void +free_chain(struct VkBaseOutStructure *chain) +{ + while (chain) { + void *node = chain; + chain = chain->pNext; + free(node); + } +} + +/**/ + +static struct instance_data *new_instance_data(VkInstance instance) +{ + struct instance_data *data = rzalloc(NULL, struct instance_data); + data->instance = instance; + data->control_client = -1; + map_object(HKEY(data->instance), data); + return data; +} + +static void destroy_instance_data(struct instance_data *data) +{ + if (data->params.output_file) + fclose(data->params.output_file); + if (data->params.control >= 0) + os_socket_close(data->params.control); + unmap_object(HKEY(data->instance)); + ralloc_free(data); +} + +static void instance_data_map_physical_devices(struct instance_data *instance_data, + bool map) +{ + uint32_t physicalDeviceCount = 0; + instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance, + &physicalDeviceCount, + NULL); + + VkPhysicalDevice *physicalDevices = (VkPhysicalDevice *) malloc(sizeof(VkPhysicalDevice) * physicalDeviceCount); + instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance, + &physicalDeviceCount, + physicalDevices); + + for (uint32_t i = 0; i < physicalDeviceCount; i++) { + if (map) + map_object(HKEY(physicalDevices[i]), instance_data); + else + unmap_object(HKEY(physicalDevices[i])); + } + + free(physicalDevices); +} + +/**/ +static struct device_data *new_device_data(VkDevice device, struct instance_data *instance) +{ + struct device_data *data = rzalloc(NULL, struct device_data); + data->instance = instance; + data->device = device; + map_object(HKEY(data->device), data); + return data; +} + +static struct queue_data *new_queue_data(VkQueue queue, + const VkQueueFamilyProperties *family_props, + uint32_t family_index, + struct device_data *device_data) +{ + struct queue_data *data = rzalloc(device_data, struct queue_data); + data->device = device_data; + data->queue = queue; + data->flags = family_props->queueFlags; + data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1; + data->family_index = family_index; + list_inithead(&data->running_command_buffer); + map_object(HKEY(data->queue), data); + + /* Fence synchronizing access to queries on that queue. */ + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; + VK_CHECK(device_data->vtable.CreateFence(device_data->device, + &fence_info, + NULL, + &data->queries_fence)); + + if (data->flags & VK_QUEUE_GRAPHICS_BIT) + device_data->graphic_queue = data; + + return data; +} + +static void destroy_queue(struct queue_data *data) +{ + struct device_data *device_data = data->device; + device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL); + unmap_object(HKEY(data->queue)); + ralloc_free(data); +} + +static void device_map_queues(struct device_data *data, + const VkDeviceCreateInfo *pCreateInfo) +{ + for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) + data->n_queues += pCreateInfo->pQueueCreateInfos[i].queueCount; + data->queues = ralloc_array(data, struct queue_data *, data->n_queues); + + struct instance_data *instance_data = data->instance; + uint32_t n_family_props; + instance_data->vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device, + &n_family_props, + NULL); + VkQueueFamilyProperties *family_props = + (VkQueueFamilyProperties *)malloc(sizeof(VkQueueFamilyProperties) * n_family_props); + instance_data->vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device, + &n_family_props, + family_props); + + uint32_t queue_index = 0; + for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + for (uint32_t j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) { + VkQueue queue; + data->vtable.GetDeviceQueue(data->device, + pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, + j, &queue); + + VK_CHECK(data->set_device_loader_data(data->device, queue)); + + data->queues[queue_index++] = + new_queue_data(queue, &family_props[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex], + pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, data); + } + } + + free(family_props); +} + +static void device_unmap_queues(struct device_data *data) +{ + for (uint32_t i = 0; i < data->n_queues; i++) + destroy_queue(data->queues[i]); +} + +static void destroy_device_data(struct device_data *data) +{ + unmap_object(HKEY(data->device)); + ralloc_free(data); +} + +/**/ +static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer, + VkCommandBufferLevel level, + VkQueryPool pipeline_query_pool, + VkQueryPool timestamp_query_pool, + uint32_t query_index, + struct device_data *device_data) +{ + struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data); + data->device = device_data; + data->cmd_buffer = cmd_buffer; + data->level = level; + data->pipeline_query_pool = pipeline_query_pool; + data->timestamp_query_pool = timestamp_query_pool; + data->query_index = query_index; + list_inithead(&data->link); + map_object(HKEY(data->cmd_buffer), data); + return data; +} + +static void destroy_command_buffer_data(struct command_buffer_data *data) +{ + unmap_object(HKEY(data->cmd_buffer)); + list_delinit(&data->link); + ralloc_free(data); +} + +/**/ +static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain, + struct device_data *device_data) +{ + struct instance_data *instance_data = device_data->instance; + struct swapchain_data *data = rzalloc(NULL, struct swapchain_data); + data->device = device_data; + data->swapchain = swapchain; + data->window_size = ImVec2(instance_data->params.width, instance_data->params.height); + list_inithead(&data->draws); + map_object(HKEY(data->swapchain), data); + return data; +} + +static void destroy_swapchain_data(struct swapchain_data *data) +{ + unmap_object(HKEY(data->swapchain)); + ralloc_free(data); +} + +struct overlay_draw *get_overlay_draw(struct swapchain_data *data) +{ + struct device_data *device_data = data->device; + struct overlay_draw *draw = list_is_empty(&data->draws) ? + NULL : list_first_entry(&data->draws, struct overlay_draw, link); + + VkSemaphoreCreateInfo sem_info = {}; + sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) { + list_del(&draw->link); + VK_CHECK(device_data->vtable.ResetFences(device_data->device, + 1, &draw->fence)); + list_addtail(&draw->link, &data->draws); + return draw; + } + + draw = rzalloc(data, struct overlay_draw); + + VkCommandBufferAllocateInfo cmd_buffer_info = {}; + cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cmd_buffer_info.commandPool = data->command_pool; + cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd_buffer_info.commandBufferCount = 1; + VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device, + &cmd_buffer_info, + &draw->command_buffer)); + VK_CHECK(device_data->set_device_loader_data(device_data->device, + draw->command_buffer)); + + + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VK_CHECK(device_data->vtable.CreateFence(device_data->device, + &fence_info, + NULL, + &draw->fence)); + + VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info, + NULL, &draw->semaphore)); + + list_addtail(&draw->link, &data->draws); + + return draw; +} + +static const char *param_unit(enum overlay_param_enabled param) +{ + switch (param) { + case OVERLAY_PARAM_ENABLED_frame_timing: + case OVERLAY_PARAM_ENABLED_acquire_timing: + case OVERLAY_PARAM_ENABLED_present_timing: + return "(us)"; + case OVERLAY_PARAM_ENABLED_gpu_timing: + return "(ns)"; + default: + return ""; + } +} + +static void parse_command(struct instance_data *instance_data, + const char *cmd, unsigned cmdlen, + const char *param, unsigned paramlen) +{ + if (!strncmp(cmd, "capture", cmdlen)) { + int value = atoi(param); + bool enabled = value > 0; + + if (enabled) { + instance_data->capture_enabled = true; + } else { + instance_data->capture_enabled = false; + instance_data->capture_started = false; + } + } +} + +#define BUFSIZE 4096 + +/** + * This function will process commands through the control file. + * + * A command starts with a colon, followed by the command, and followed by an + * option '=' and a parameter. It has to end with a semi-colon. A full command + * + parameter looks like: + * + * :cmd=param; + */ +static void process_char(struct instance_data *instance_data, char c) +{ + static char cmd[BUFSIZE]; + static char param[BUFSIZE]; + + static unsigned cmdpos = 0; + static unsigned parampos = 0; + static bool reading_cmd = false; + static bool reading_param = false; + + switch (c) { + case ':': + cmdpos = 0; + parampos = 0; + reading_cmd = true; + reading_param = false; + break; + case ';': + if (!reading_cmd) + break; + cmd[cmdpos++] = '\0'; + param[parampos++] = '\0'; + parse_command(instance_data, cmd, cmdpos, param, parampos); + reading_cmd = false; + reading_param = false; + break; + case '=': + if (!reading_cmd) + break; + reading_param = true; + break; + default: + if (!reading_cmd) + break; + + if (reading_param) { + /* overflow means an invalid parameter */ + if (parampos >= BUFSIZE - 1) { + reading_cmd = false; + reading_param = false; + break; + } + + param[parampos++] = c; + } else { + /* overflow means an invalid command */ + if (cmdpos >= BUFSIZE - 1) { + reading_cmd = false; + break; + } + + cmd[cmdpos++] = c; + } + } +} + +static void control_send(struct instance_data *instance_data, + const char *cmd, unsigned cmdlen, + const char *param, unsigned paramlen) +{ + unsigned msglen = 0; + char buffer[BUFSIZE]; + + assert(cmdlen + paramlen + 3 < BUFSIZE); + + buffer[msglen++] = ':'; + + memcpy(&buffer[msglen], cmd, cmdlen); + msglen += cmdlen; + + if (paramlen > 0) { + buffer[msglen++] = '='; + memcpy(&buffer[msglen], param, paramlen); + msglen += paramlen; + buffer[msglen++] = ';'; + } + + os_socket_send(instance_data->control_client, buffer, msglen, 0); +} + +static void control_send_connection_string(struct device_data *device_data) +{ + struct instance_data *instance_data = device_data->instance; + + const char *controlVersionCmd = "MesaOverlayControlVersion"; + const char *controlVersionString = "1"; + + control_send(instance_data, controlVersionCmd, strlen(controlVersionCmd), + controlVersionString, strlen(controlVersionString)); + + const char *deviceCmd = "DeviceName"; + const char *deviceName = device_data->properties.deviceName; + + control_send(instance_data, deviceCmd, strlen(deviceCmd), + deviceName, strlen(deviceName)); + + const char *mesaVersionCmd = "MesaVersion"; + const char *mesaVersionString = "Mesa " PACKAGE_VERSION; + + control_send(instance_data, mesaVersionCmd, strlen(mesaVersionCmd), + mesaVersionString, strlen(mesaVersionString)); +} + +static void control_client_check(struct device_data *device_data) +{ + struct instance_data *instance_data = device_data->instance; + + /* Already connected, just return. */ + if (instance_data->control_client >= 0) + return; + + int socket = os_socket_accept(instance_data->params.control); + if (socket == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK && errno != ECONNABORTED) + fprintf(stderr, "ERROR on socket: %s\n", strerror(errno)); + return; + } + + if (socket >= 0) { + os_socket_block(socket, false); + instance_data->control_client = socket; + control_send_connection_string(device_data); + } +} + +static void control_client_disconnected(struct instance_data *instance_data) +{ + os_socket_close(instance_data->control_client); + instance_data->control_client = -1; +} + +static void process_control_socket(struct instance_data *instance_data) +{ + const int client = instance_data->control_client; + if (client >= 0) { + char buf[BUFSIZE]; + + while (true) { + ssize_t n = os_socket_recv(client, buf, BUFSIZE, 0); + + if (n == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + /* nothing to read, try again later */ + break; + } + + if (errno != ECONNRESET) + fprintf(stderr, "ERROR on connection: %s\n", strerror(errno)); + + control_client_disconnected(instance_data); + } else if (n == 0) { + /* recv() returns 0 when the client disconnects */ + control_client_disconnected(instance_data); + } + + for (ssize_t i = 0; i < n; i++) { + process_char(instance_data, buf[i]); + } + + /* If we try to read BUFSIZE and receive BUFSIZE bytes from the + * socket, there's a good chance that there's still more data to be + * read, so we will try again. Otherwise, simply be done for this + * iteration and try again on the next frame. + */ + if (n < BUFSIZE) + break; + } + } +} + +static void snapshot_swapchain_frame(struct swapchain_data *data) +{ + struct device_data *device_data = data->device; + struct instance_data *instance_data = device_data->instance; + uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats); + uint64_t now = os_time_get(); /* us */ + + if (instance_data->params.control >= 0) { + control_client_check(device_data); + process_control_socket(instance_data); + } + + double elapsed = (double)(now - data->last_fps_update); /* us */ + elapsedF2 = (double)(now - last_f2_press); + elapsedF12 = (double)(now - last_f12_press); + fps = 1000000.0f * data->n_frames_since_update / elapsed; + + if (data->last_present_time) { + data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] = + now - data->last_present_time; + } + + memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx])); + for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; + data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; + } + + if (elapsedF2 >= 500000 && !mangohud_output_env == NULL){ + if (key_is_pressed(XK_F2)){ + last_f2_press = now; + log_start = now; + loggingOn = !loggingOn; + + if (loggingOn && log_period != 0) + pthread_create(&f2, NULL, &logging, NULL); + + } + } + + if (elapsedF12 >= 500000){ + if (key_is_pressed(XK_F12)){ + displayHud = !displayHud; + last_f12_press = now; + } + } + + if (!sysInfoFetched) { + deviceName = device_data->properties.deviceName; + ram = exec("cat /proc/meminfo | grep 'MemTotal' | awk '{print $2}'"); + cpu = exec("cat /proc/cpuinfo | grep 'model name' | tail -n1 | sed 's/^.*: //' | sed 's/([^)]*)/()/g' | tr -d '(/)'"); + kernel = exec("uname -r"); + os = exec("cat /etc/*-release | grep 'PRETTY_NAME' | cut -d '=' -f 2-"); + os.erase(remove( os.begin(), os.end(), '\"' ),os.end()); + gpu = exec("lspci | grep VGA | head -n1 | awk -vRS=']' -vFS='[' '{print $2}' | sed '/^$/d' | tail -n1"); + driver = exec("glxinfo | grep 'OpenGL version' | sed 's/^.*: //' | cut -d' ' --output-delimiter=$'\n' -f1- | grep -v '(' | grep -v ')' | tr '\n' ' ' | cut -c 1-"); + ram.pop_back(); + cpu.pop_back(); + kernel.pop_back(); + os.pop_back(); + gpu.pop_back(); + driver.pop_back(); + + log_period = (log_period_env) ? std::stoi(log_period_env) : 100; + + if (log_period == 0) + out.open("/tmp/mango", ios::out | ios::app); + + if(duration_env) + duration = std::stoi(duration_env); + + coreCounting(); + if (deviceName.find("Radeon") != std::string::npos || deviceName.find("AMD") != std::string::npos) { + amdGpuFile = fopen("/sys/class/drm/card0/device/gpu_busy_percent", "r"); + string tempFolder = exec("ls /sys/class/drm/card0/device/hwmon/"); + tempFolder.pop_back(); + string tempLocation = "/sys/class/drm/card0/device/hwmon/" + tempFolder + "/temp1_input"; + amdTempFile = fopen(tempLocation.c_str(), "r"); + } + if (cpu.find("Intel") != std::string::npos){ + string cpuTempFolder = exec("ls /sys/devices/platform/coretemp.0/hwmon/"); + cpuTempFolder.pop_back(); + cpuTempLocation = "/sys/devices/platform/coretemp.0/hwmon/" + cpuTempFolder + "/temp1_input"; + cpuTempFile = fopen(cpuTempLocation.c_str(), "r"); + } else { + string name; + string path; + for (size_t i = 0; i < 10; i++) + { + path = "/sys/class/hwmon/hwmon" + to_string(i) + "/name"; + name = exec("cat " + path); + name.pop_back(); + if (name == "k10temp" || name == "zenpower"){ + cpuTempLocation = "/sys/class/hwmon/hwmon" + to_string(i) + "/temp1_input"; + break; + } + } + if (cpuTempLocation.empty()) { + cout << "MANGOHUD: Could not find temp location" << endl; + } else { + cpuTempFile = fopen(cpuTempLocation.c_str(), "r"); + } + } + // Adjust height for DXVK/VKD3D version number + if (engineName == "DXVK" || engineName == "VKD3D"){ + if (instance_data->params.font_size){ + instance_data->params.height += instance_data->params.font_size / 2; + } else { + instance_data->params.height += 24 / 2; + } + } + + sysInfoFetched = true; + } + + /* If capture has been enabled but it hasn't started yet, it means we are on + * the first snapshot after it has been enabled. At this point we want to + * use the stats captured so far to update the display, but we don't want + * this data to cause noise to the stats that we want to capture from now + * on. + * + * capture_begin == true will trigger an update of the fps on display, and a + * flush of the data, but no stats will be written to the output file. This + * way, we will have only stats from after the capture has been enabled + * written to the output_file. + */ + const bool capture_begin = + instance_data->capture_enabled && !instance_data->capture_started; + + if (data->last_fps_update) { + if (capture_begin || + elapsed >= instance_data->params.fps_sampling_period) { + updateCpuStrings(); + pthread_create(&cpuThread, NULL, &getCpuUsage, NULL); + data->cpuString = cpuArray[0].output.c_str(); + pthread_create(&cpuInfoThread, NULL, &cpuInfo, NULL); + + // get gpu usage + if (deviceName.find("GeForce") != std::string::npos) + pthread_create(&nvidiaSmiThread, NULL, &queryNvidiaSmi, NULL); + + if (deviceName.find("Radeon") != std::string::npos || deviceName.find("AMD") != std::string::npos) + pthread_create(&gpuThread, NULL, &getAmdGpuUsage, NULL); + + // update variables for logging + cpuLoadLog = cpuArray[0].value; + gpuLoadLog = gpuLoad; + + data->frametimeDisplay = data->frametime; + data->fps = fps; + if (instance_data->capture_started) { + if (!instance_data->first_line_printed) { + bool first_column = true; + + instance_data->first_line_printed = true; + +#define OVERLAY_PARAM_BOOL(name) \ + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_##name]) { \ + fprintf(instance_data->params.output_file, \ + "%s%s%s", first_column ? "" : ", ", #name, \ + param_unit(OVERLAY_PARAM_ENABLED_##name)); \ + first_column = false; \ + } +#define OVERLAY_PARAM_CUSTOM(name) + OVERLAY_PARAMS +#undef OVERLAY_PARAM_BOOL +#undef OVERLAY_PARAM_CUSTOM + fprintf(instance_data->params.output_file, "\n"); + } + + for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + if (!instance_data->params.enabled[s]) + continue; + if (s == OVERLAY_PARAM_ENABLED_fps) { + fprintf(instance_data->params.output_file, + "%s%.2f", s == 0 ? "" : ", ", data->fps); + } else { + fprintf(instance_data->params.output_file, + "%s%" PRIu64, s == 0 ? "" : ", ", + data->accumulated_stats.stats[s]); + } + } + fprintf(instance_data->params.output_file, "\n"); + fflush(instance_data->params.output_file); + } + + memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats)); + data->n_frames_since_update = 0; + data->last_fps_update = now; + + if (capture_begin) + instance_data->capture_started = true; + } + } else { + data->last_fps_update = now; + } + + memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats)); + memset(&data->frame_stats, 0, sizeof(device_data->frame_stats)); + + data->last_present_time = now; + data->n_frames++; + data->n_frames_since_update++; +} + +static float get_time_stat(void *_data, int _idx) +{ + struct swapchain_data *data = (struct swapchain_data *) _data; + if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) + return 0.0f; + int idx = ARRAY_SIZE(data->frames_stats) + + data->n_frames < ARRAY_SIZE(data->frames_stats) ? + _idx - data->n_frames : + _idx + data->n_frames; + idx %= ARRAY_SIZE(data->frames_stats); + /* Time stats are in us. */ + return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor; +} + +static float get_stat(void *_data, int _idx) +{ + struct swapchain_data *data = (struct swapchain_data *) _data; + if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) + return 0.0f; + int idx = ARRAY_SIZE(data->frames_stats) + + data->n_frames < ARRAY_SIZE(data->frames_stats) ? + _idx - data->n_frames : + _idx + data->n_frames; + idx %= ARRAY_SIZE(data->frames_stats); + return data->frames_stats[idx].stats[data->stat_selector]; +} + +static void position_layer(struct swapchain_data *data) + +{ + struct device_data *device_data = data->device; + struct instance_data *instance_data = device_data->instance; + float margin = 10.0f; + if (!offset_x_env == NULL) + margin = 0.0f; + + + ImGui::SetNextWindowBgAlpha(0.5); + ImGui::SetNextWindowSize(ImVec2(instance_data->params.width, instance_data->params.height), ImGuiCond_Always); + ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.0f); + + if (!offset_x_env == NULL) + offset_x = std::stof(offset_x_env); + + if (!offset_y_env == NULL) + offset_y = std::stof(offset_y_env); + + switch (instance_data->params.position) { + case LAYER_POSITION_TOP_LEFT: + ImGui::SetNextWindowPos(ImVec2(margin + offset_x, margin + offset_y), ImGuiCond_Always); + break; + case LAYER_POSITION_TOP_RIGHT: + ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, margin), + ImGuiCond_Always); + break; + case LAYER_POSITION_BOTTOM_LEFT: + ImGui::SetNextWindowPos(ImVec2(margin, data->height - data->window_size.y - margin), + ImGuiCond_Always); + break; + case LAYER_POSITION_BOTTOM_RIGHT: + ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, + data->height - data->window_size.y - margin), + ImGuiCond_Always); + break; + } +} + +static void compute_swapchain_display(struct swapchain_data *data) +{ + struct device_data *device_data = data->device; + struct instance_data *instance_data = device_data->instance; + + ImGui::SetCurrentContext(data->imgui_context); + ImGui::NewFrame(); + position_layer(data); + if (instance_data->params.font_size > 0 && instance_data->params.width == 280) + instance_data->params.width = hudFirstRow + hudSecondRow; + + if(displayHud) + ImGui::Begin("Main", &open, ImGuiWindowFlags_NoDecoration); + + if(!displayHud){ + ImGui::SetNextWindowBgAlpha(0.01); + ImGui::Begin("Main", &open, ImGuiWindowFlags_NoDecoration); + } + + if (displayHud){ + if (deviceName.find("GeForce") != std::string::npos || deviceName.find("Radeon") != std::string::npos || deviceName.find("AMD") != std::string::npos){ + int gpuloadLength = gpuLoadDisplay.length(); + ImGui::TextColored(ImVec4(0.0, 0.502, 0.25, 1.00f), "GPU"); + ImGui::SameLine(hudFirstRow); + ImGui::Text("%s%%", gpuLoadDisplay.c_str()); + // ImGui::SameLine(150); + // ImGui::Text("%s", "%"); + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){ + ImGui::SameLine(hudSecondRow); + ImGui::Text("%i%s", gpuTemp, "°C"); + } + } + int cpuloadLength = to_string(cpuLoadLog).length(); + ImGui::TextColored(ImVec4(0.0, 0.502, 0.753, 1.00f), "CPU"); + ImGui::SameLine(hudFirstRow); + ImGui::Text("%d%%", cpuLoadLog); + // ImGui::SameLine(150); + // ImGui::Text("%s", "%"); + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_cpu_temp]){ + ImGui::SameLine(hudSecondRow); + ImGui::Text("%i%s", cpuTemp, "°C"); + } + + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_core_load]){ + for (int i = 0; i < numCpuCores; i++) + { + int cpuCoreLoadLength = to_string(cpuArray[i + 1].value).length(); + ImGui::TextColored(ImVec4(0.0, 0.502, 0.753, 1.00f), "CPU"); + ImGui::SameLine(0, 1.0f); + ImGui::PushFont(font1); + ImGui::TextColored(ImVec4(0.0, 0.502, 0.753, 1.00f),"%i", i); + ImGui::PopFont(); + ImGui::SameLine(hudFirstRow); + ImGui::Text("%i%%", cpuArray[i + 1].value); + ImGui::SameLine(hudSecondRow); + ImGui::Text("%i", cpuArray[i + 1].freq); + ImGui::SameLine(0, 1.0f); + ImGui::PushFont(font1); + ImGui::Text("MHz"); + ImGui::PopFont(); + } + } + if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps]){ + int fpsLength = to_string(int(data->fps)).length(); + int msLength = to_string(1000 / data->fps).length(); + ImGui::TextColored(ImVec4(0.753, 0.502, 0.502, 1.00f), "%s", engineName.c_str()); + ImGui::SameLine(hudFirstRow); + ImGui::Text("%.0f", data->fps); + ImGui::SameLine(0, 1.0f); + ImGui::PushFont(font1); + ImGui::Text("FPS"); + ImGui::PopFont(); + ImGui::SameLine(hudSecondRow); + ImGui::Text("%.1f", 1000 / data->fps); + ImGui::SameLine(0, 1.0f); + ImGui::PushFont(font1); + ImGui::Text("ms"); + ImGui::PopFont(); + if (engineName == "DXVK" || engineName == "VKD3D"){ + ImGui::PushFont(font1); + ImGui::TextColored(ImVec4(0.753, 0.502, 0.502, 1.00f), "%s", engineVersion.c_str()); + ImGui::PopFont(); + } + } + + // ImGui::ProgressBar(float(0.5), ImVec2(ImGui::GetContentRegionAvailWidth(), 21), NULL); + ImGui::Dummy(ImVec2(0.0f, 20.0f)); + + if (loggingOn && log_period == 0){ + uint64_t now = os_time_get(); + elapsedLog = (double)(now - log_start); + if ((elapsedLog) >= duration * 1000000) + loggingOn = false; + + out << fps << "," << cpuLoadLog << "," << gpuLoadLog << "," << (now - log_start) << endl; + } + + /* Recompute min/max */ + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + data->stats_min.stats[s] = UINT64_MAX; + data->stats_max.stats[s] = 0; + } + for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) { + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s], + data->stats_min.stats[s]); + data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s], + data->stats_max.stats[s]); + } + } + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + assert(data->stats_min.stats[s] != UINT64_MAX); + } + + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { + if (!instance_data->params.enabled[s] || + s == OVERLAY_PARAM_ENABLED_fps || + s == OVERLAY_PARAM_ENABLED_frame) + continue; + + char hash[40]; + snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]); + data->stat_selector = (enum overlay_param_enabled) s; + data->time_dividor = 1000.0f; + if (s == OVERLAY_PARAM_ENABLED_gpu_timing) + data->time_dividor = 1000000.0f; + + if (s == OVERLAY_PARAM_ENABLED_frame_timing || + s == OVERLAY_PARAM_ENABLED_acquire_timing || + s == OVERLAY_PARAM_ENABLED_present_timing || + s == OVERLAY_PARAM_ENABLED_gpu_timing) { + // double min_time = data->stats_min.stats[s] / data->time_dividor; + // double max_time = data->stats_max.stats[s] / data->time_dividor; + double min_time = 0.0f; + double max_time = 50.0f; + ImGui::PlotLines(hash, get_time_stat, data, + ARRAY_SIZE(data->frames_stats), 0, + NULL, min_time, max_time, + ImVec2(ImGui::GetContentRegionAvailWidth(), 50)); + // ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s], + // get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1), + // min_time, max_time); + } else { + ImGui::PlotHistogram(hash, get_stat, data, + ARRAY_SIZE(data->frames_stats), 0, + NULL, + data->stats_min.stats[s], + data->stats_max.stats[s], + ImVec2(ImGui::GetContentRegionAvailWidth(), 50)); + // ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s], + // get_stat(data, ARRAY_SIZE(data->frames_stats) - 1), + // data->stats_min.stats[s], data->stats_max.stats[s]); + } + } + data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f); + } + ImGui::End(); + if(loggingOn){ + ImGui::SetNextWindowBgAlpha(0.01); + ImGui::SetNextWindowSize(ImVec2(200, 100), ImGuiCond_Always); + ImGui::SetNextWindowPos(ImVec2(data->width - 200, + 0), + ImGuiCond_Always); + ImGui::Begin("Logging", &open, ImGuiWindowFlags_NoDecoration); + ImGui::Text("Logging..."); + ImGui::Text("Elapsed: %isec", int((elapsedLog) / 1000000)); + ImGui::End(); + } + ImGui::PopStyleVar(); + ImGui::EndFrame(); + ImGui::Render(); + +} + +static uint32_t vk_memory_type(struct device_data *data, + VkMemoryPropertyFlags properties, + uint32_t type_bits) +{ + VkPhysicalDeviceMemoryProperties prop; + data->instance->vtable.GetPhysicalDeviceMemoryProperties(data->physical_device, &prop); + for (uint32_t i = 0; i < prop.memoryTypeCount; i++) + if ((prop.memoryTypes[i].propertyFlags & properties) == properties && type_bits & (1<font_uploaded) + return; + + data->font_uploaded = true; + + struct device_data *device_data = data->device; + ImGuiIO& io = ImGui::GetIO(); + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + size_t upload_size = width * height * 4 * sizeof(char); + + /* Upload buffer */ + VkBufferCreateInfo buffer_info = {}; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.size = upload_size; + buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + VK_CHECK(device_data->vtable.CreateBuffer(device_data->device, &buffer_info, + NULL, &data->upload_font_buffer)); + VkMemoryRequirements upload_buffer_req; + device_data->vtable.GetBufferMemoryRequirements(device_data->device, + data->upload_font_buffer, + &upload_buffer_req); + VkMemoryAllocateInfo upload_alloc_info = {}; + upload_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + upload_alloc_info.allocationSize = upload_buffer_req.size; + upload_alloc_info.memoryTypeIndex = vk_memory_type(device_data, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + upload_buffer_req.memoryTypeBits); + VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, + &upload_alloc_info, + NULL, + &data->upload_font_buffer_mem)); + VK_CHECK(device_data->vtable.BindBufferMemory(device_data->device, + data->upload_font_buffer, + data->upload_font_buffer_mem, 0)); + + /* Upload to Buffer */ + char* map = NULL; + VK_CHECK(device_data->vtable.MapMemory(device_data->device, + data->upload_font_buffer_mem, + 0, upload_size, 0, (void**)(&map))); + memcpy(map, pixels, upload_size); + VkMappedMemoryRange range[1] = {}; + range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range[0].memory = data->upload_font_buffer_mem; + range[0].size = upload_size; + VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range)); + device_data->vtable.UnmapMemory(device_data->device, + data->upload_font_buffer_mem); + + /* Copy buffer to image */ + VkImageMemoryBarrier copy_barrier[1] = {}; + copy_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + copy_barrier[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + copy_barrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + copy_barrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + copy_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + copy_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + copy_barrier[0].image = data->font_image; + copy_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy_barrier[0].subresourceRange.levelCount = 1; + copy_barrier[0].subresourceRange.layerCount = 1; + device_data->vtable.CmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, NULL, 0, NULL, + 1, copy_barrier); + + VkBufferImageCopy region = {}; + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.imageSubresource.layerCount = 1; + region.imageExtent.width = width; + region.imageExtent.height = height; + region.imageExtent.depth = 1; + device_data->vtable.CmdCopyBufferToImage(command_buffer, + data->upload_font_buffer, + data->font_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, ®ion); + + VkImageMemoryBarrier use_barrier[1] = {}; + use_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + use_barrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + use_barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + use_barrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + use_barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + use_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + use_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + use_barrier[0].image = data->font_image; + use_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + use_barrier[0].subresourceRange.levelCount = 1; + use_barrier[0].subresourceRange.layerCount = 1; + device_data->vtable.CmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + 0, + 0, NULL, + 0, NULL, + 1, use_barrier); + + /* Store our identifier */ + io.Fonts->TexID = (ImTextureID)(intptr_t)data->font_image; +} + +static void CreateOrResizeBuffer(struct device_data *data, + VkBuffer *buffer, + VkDeviceMemory *buffer_memory, + VkDeviceSize *buffer_size, + size_t new_size, VkBufferUsageFlagBits usage) +{ + if (*buffer != VK_NULL_HANDLE) + data->vtable.DestroyBuffer(data->device, *buffer, NULL); + if (*buffer_memory) + data->vtable.FreeMemory(data->device, *buffer_memory, NULL); + + VkBufferCreateInfo buffer_info = {}; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.size = new_size; + buffer_info.usage = usage; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + VK_CHECK(data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer)); + + VkMemoryRequirements req; + data->vtable.GetBufferMemoryRequirements(data->device, *buffer, &req); + VkMemoryAllocateInfo alloc_info = {}; + alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + alloc_info.allocationSize = req.size; + alloc_info.memoryTypeIndex = + vk_memory_type(data, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, req.memoryTypeBits); + VK_CHECK(data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory)); + + VK_CHECK(data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0)); + *buffer_size = new_size; +} + +static struct overlay_draw *render_swapchain_display(struct swapchain_data *data, + struct queue_data *present_queue, + const VkSemaphore *wait_semaphores, + unsigned n_wait_semaphores, + unsigned image_index) +{ + ImDrawData* draw_data = ImGui::GetDrawData(); + if (draw_data->TotalVtxCount == 0) + return NULL; + + struct device_data *device_data = data->device; + struct overlay_draw *draw = get_overlay_draw(data); + + device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0); + + VkRenderPassBeginInfo render_pass_info = {}; + render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_info.renderPass = data->render_pass; + render_pass_info.framebuffer = data->framebuffers[image_index]; + render_pass_info.renderArea.extent.width = data->width; + render_pass_info.renderArea.extent.height = data->height; + + VkCommandBufferBeginInfo buffer_begin_info = {}; + buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info); + + ensure_swapchain_fonts(data, draw->command_buffer); + + /* Bounce the image to display back to color attachment layout for + * rendering on top of it. + */ + VkImageMemoryBarrier imb; + imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imb.pNext = nullptr; + imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + imb.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + imb.image = data->images[image_index]; + imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imb.subresourceRange.baseMipLevel = 0; + imb.subresourceRange.levelCount = 1; + imb.subresourceRange.baseArrayLayer = 0; + imb.subresourceRange.layerCount = 1; + imb.srcQueueFamilyIndex = present_queue->family_index; + imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index; + device_data->vtable.CmdPipelineBarrier(draw->command_buffer, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + 0, /* dependency flags */ + 0, nullptr, /* memory barriers */ + 0, nullptr, /* buffer memory barriers */ + 1, &imb); /* image memory barriers */ + + device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info, + VK_SUBPASS_CONTENTS_INLINE); + + /* Create/Resize vertex & index buffers */ + size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert); + size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx); + if (draw->vertex_buffer_size < vertex_size) { + CreateOrResizeBuffer(device_data, + &draw->vertex_buffer, + &draw->vertex_buffer_mem, + &draw->vertex_buffer_size, + vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + } + if (draw->index_buffer_size < index_size) { + CreateOrResizeBuffer(device_data, + &draw->index_buffer, + &draw->index_buffer_mem, + &draw->index_buffer_size, + index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); + } + + /* Upload vertex & index data */ + ImDrawVert* vtx_dst = NULL; + ImDrawIdx* idx_dst = NULL; + VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem, + 0, vertex_size, 0, (void**)(&vtx_dst))); + VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem, + 0, index_size, 0, (void**)(&idx_dst))); + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); + memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); + vtx_dst += cmd_list->VtxBuffer.Size; + idx_dst += cmd_list->IdxBuffer.Size; + } + VkMappedMemoryRange range[2] = {}; + range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range[0].memory = draw->vertex_buffer_mem; + range[0].size = VK_WHOLE_SIZE; + range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range[1].memory = draw->index_buffer_mem; + range[1].size = VK_WHOLE_SIZE; + VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range)); + device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem); + device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem); + + /* Bind pipeline and descriptor sets */ + device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline); + VkDescriptorSet desc_set[1] = { data->descriptor_set }; + device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + data->pipeline_layout, 0, 1, desc_set, 0, NULL); + + /* Bind vertex & index buffers */ + VkBuffer vertex_buffers[1] = { draw->vertex_buffer }; + VkDeviceSize vertex_offset[1] = { 0 }; + device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset); + device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16); + + /* Setup viewport */ + VkViewport viewport; + viewport.x = 0; + viewport.y = 0; + viewport.width = draw_data->DisplaySize.x; + viewport.height = draw_data->DisplaySize.y; + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport); + + + /* Setup scale and translation through push constants : + * + * Our visible imgui space lies from draw_data->DisplayPos (top left) to + * draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin + * is typically (0,0) for single viewport apps. + */ + float scale[2]; + scale[0] = 2.0f / draw_data->DisplaySize.x; + scale[1] = 2.0f / draw_data->DisplaySize.y; + float translate[2]; + translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0]; + translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1]; + device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, + VK_SHADER_STAGE_VERTEX_BIT, + sizeof(float) * 0, sizeof(float) * 2, scale); + device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, + VK_SHADER_STAGE_VERTEX_BIT, + sizeof(float) * 2, sizeof(float) * 2, translate); + + // Render the command lists: + int vtx_offset = 0; + int idx_offset = 0; + ImVec2 display_pos = draw_data->DisplayPos; + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + // Apply scissor/clipping rectangle + // FIXME: We could clamp width/height based on clamped min/max values. + VkRect2D scissor; + scissor.offset.x = (int32_t)(pcmd->ClipRect.x - display_pos.x) > 0 ? (int32_t)(pcmd->ClipRect.x - display_pos.x) : 0; + scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0; + scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x); + scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here? + device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor); + + // Draw + device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0); + + idx_offset += pcmd->ElemCount; + } + vtx_offset += cmd_list->VtxBuffer.Size; + } + + device_data->vtable.CmdEndRenderPass(draw->command_buffer); + + /* Bounce the image to display back to present layout. */ + imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imb.pNext = nullptr; + imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imb.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + imb.image = data->images[image_index]; + imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imb.subresourceRange.baseMipLevel = 0; + imb.subresourceRange.levelCount = 1; + imb.subresourceRange.baseArrayLayer = 0; + imb.subresourceRange.layerCount = 1; + imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index; + imb.dstQueueFamilyIndex = present_queue->family_index; + device_data->vtable.CmdPipelineBarrier(draw->command_buffer, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + 0, /* dependency flags */ + 0, nullptr, /* memory barriers */ + 0, nullptr, /* buffer memory barriers */ + 1, &imb); /* image memory barriers */ + + device_data->vtable.EndCommandBuffer(draw->command_buffer); + + VkSubmitInfo submit_info = {}; + VkPipelineStageFlags stage_wait = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &draw->command_buffer; + submit_info.pWaitDstStageMask = &stage_wait; + submit_info.waitSemaphoreCount = n_wait_semaphores; + submit_info.pWaitSemaphores = wait_semaphores; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &draw->semaphore; + + device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence); + + return draw; +} + +static const uint32_t overlay_vert_spv[] = { +#include "overlay.vert.spv.h" +}; +static const uint32_t overlay_frag_spv[] = { +#include "overlay.frag.spv.h" +}; + +static void setup_swapchain_data_pipeline(struct swapchain_data *data) +{ + struct device_data *device_data = data->device; + VkShaderModule vert_module, frag_module; + + /* Create shader modules */ + VkShaderModuleCreateInfo vert_info = {}; + vert_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + vert_info.codeSize = sizeof(overlay_vert_spv); + vert_info.pCode = overlay_vert_spv; + VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, + &vert_info, NULL, &vert_module)); + VkShaderModuleCreateInfo frag_info = {}; + frag_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + frag_info.codeSize = sizeof(overlay_frag_spv); + frag_info.pCode = (uint32_t*)overlay_frag_spv; + VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, + &frag_info, NULL, &frag_module)); + + /* Font sampler */ + VkSamplerCreateInfo sampler_info = {}; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.minLod = -1000; + sampler_info.maxLod = 1000; + sampler_info.maxAnisotropy = 1.0f; + VK_CHECK(device_data->vtable.CreateSampler(device_data->device, &sampler_info, + NULL, &data->font_sampler)); + + /* Descriptor pool */ + VkDescriptorPoolSize sampler_pool_size = {}; + sampler_pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + sampler_pool_size.descriptorCount = 1; + VkDescriptorPoolCreateInfo desc_pool_info = {}; + desc_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + desc_pool_info.maxSets = 1; + desc_pool_info.poolSizeCount = 1; + desc_pool_info.pPoolSizes = &sampler_pool_size; + VK_CHECK(device_data->vtable.CreateDescriptorPool(device_data->device, + &desc_pool_info, + NULL, &data->descriptor_pool)); + + /* Descriptor layout */ + VkSampler sampler[1] = { data->font_sampler }; + VkDescriptorSetLayoutBinding binding[1] = {}; + binding[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + binding[0].descriptorCount = 1; + binding[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + binding[0].pImmutableSamplers = sampler; + VkDescriptorSetLayoutCreateInfo set_layout_info = {}; + set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + set_layout_info.bindingCount = 1; + set_layout_info.pBindings = binding; + VK_CHECK(device_data->vtable.CreateDescriptorSetLayout(device_data->device, + &set_layout_info, + NULL, &data->descriptor_layout)); + + /* Descriptor set */ + VkDescriptorSetAllocateInfo alloc_info = {}; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + alloc_info.descriptorPool = data->descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &data->descriptor_layout; + VK_CHECK(device_data->vtable.AllocateDescriptorSets(device_data->device, + &alloc_info, + &data->descriptor_set)); + + /* Constants: we are using 'vec2 offset' and 'vec2 scale' instead of a full + * 3d projection matrix + */ + VkPushConstantRange push_constants[1] = {}; + push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + push_constants[0].offset = sizeof(float) * 0; + push_constants[0].size = sizeof(float) * 4; + VkPipelineLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.setLayoutCount = 1; + layout_info.pSetLayouts = &data->descriptor_layout; + layout_info.pushConstantRangeCount = 1; + layout_info.pPushConstantRanges = push_constants; + VK_CHECK(device_data->vtable.CreatePipelineLayout(device_data->device, + &layout_info, + NULL, &data->pipeline_layout)); + + VkPipelineShaderStageCreateInfo stage[2] = {}; + stage[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + stage[0].module = vert_module; + stage[0].pName = "main"; + stage[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + stage[1].module = frag_module; + stage[1].pName = "main"; + + VkVertexInputBindingDescription binding_desc[1] = {}; + binding_desc[0].stride = sizeof(ImDrawVert); + binding_desc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + + VkVertexInputAttributeDescription attribute_desc[3] = {}; + attribute_desc[0].location = 0; + attribute_desc[0].binding = binding_desc[0].binding; + attribute_desc[0].format = VK_FORMAT_R32G32_SFLOAT; + attribute_desc[0].offset = IM_OFFSETOF(ImDrawVert, pos); + attribute_desc[1].location = 1; + attribute_desc[1].binding = binding_desc[0].binding; + attribute_desc[1].format = VK_FORMAT_R32G32_SFLOAT; + attribute_desc[1].offset = IM_OFFSETOF(ImDrawVert, uv); + attribute_desc[2].location = 2; + attribute_desc[2].binding = binding_desc[0].binding; + attribute_desc[2].format = VK_FORMAT_R8G8B8A8_UNORM; + attribute_desc[2].offset = IM_OFFSETOF(ImDrawVert, col); + + VkPipelineVertexInputStateCreateInfo vertex_info = {}; + vertex_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_info.vertexBindingDescriptionCount = 1; + vertex_info.pVertexBindingDescriptions = binding_desc; + vertex_info.vertexAttributeDescriptionCount = 3; + vertex_info.pVertexAttributeDescriptions = attribute_desc; + + VkPipelineInputAssemblyStateCreateInfo ia_info = {}; + ia_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + VkPipelineViewportStateCreateInfo viewport_info = {}; + viewport_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_info.viewportCount = 1; + viewport_info.scissorCount = 1; + + VkPipelineRasterizationStateCreateInfo raster_info = {}; + raster_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + raster_info.polygonMode = VK_POLYGON_MODE_FILL; + raster_info.cullMode = VK_CULL_MODE_NONE; + raster_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + raster_info.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo ms_info = {}; + ms_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + VkPipelineColorBlendAttachmentState color_attachment[1] = {}; + color_attachment[0].blendEnable = VK_TRUE; + color_attachment[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + color_attachment[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + color_attachment[0].colorBlendOp = VK_BLEND_OP_ADD; + color_attachment[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + color_attachment[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + color_attachment[0].alphaBlendOp = VK_BLEND_OP_ADD; + color_attachment[0].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineDepthStencilStateCreateInfo depth_info = {}; + depth_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + + VkPipelineColorBlendStateCreateInfo blend_info = {}; + blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + blend_info.attachmentCount = 1; + blend_info.pAttachments = color_attachment; + + VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; + VkPipelineDynamicStateCreateInfo dynamic_state = {}; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.dynamicStateCount = (uint32_t)IM_ARRAYSIZE(dynamic_states); + dynamic_state.pDynamicStates = dynamic_states; + + VkGraphicsPipelineCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + info.flags = 0; + info.stageCount = 2; + info.pStages = stage; + info.pVertexInputState = &vertex_info; + info.pInputAssemblyState = &ia_info; + info.pViewportState = &viewport_info; + info.pRasterizationState = &raster_info; + info.pMultisampleState = &ms_info; + info.pDepthStencilState = &depth_info; + info.pColorBlendState = &blend_info; + info.pDynamicState = &dynamic_state; + info.layout = data->pipeline_layout; + info.renderPass = data->render_pass; + VK_CHECK( + device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE, + 1, &info, + NULL, &data->pipeline)); + + device_data->vtable.DestroyShaderModule(device_data->device, vert_module, NULL); + device_data->vtable.DestroyShaderModule(device_data->device, frag_module, NULL); + + ImGuiIO& io = ImGui::GetIO(); + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + + /* Font image */ + VkImageCreateInfo image_info = {}; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_info.extent.width = width; + image_info.extent.height = height; + image_info.extent.depth = 1; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VK_CHECK(device_data->vtable.CreateImage(device_data->device, &image_info, + NULL, &data->font_image)); + VkMemoryRequirements font_image_req; + device_data->vtable.GetImageMemoryRequirements(device_data->device, + data->font_image, &font_image_req); + VkMemoryAllocateInfo image_alloc_info = {}; + image_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + image_alloc_info.allocationSize = font_image_req.size; + image_alloc_info.memoryTypeIndex = vk_memory_type(device_data, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + font_image_req.memoryTypeBits); + VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info, + NULL, &data->font_mem)); + VK_CHECK(device_data->vtable.BindImageMemory(device_data->device, + data->font_image, + data->font_mem, 0)); + + /* Font image view */ + VkImageViewCreateInfo view_info = {}; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.image = data->font_image; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = VK_FORMAT_R8G8B8A8_UNORM; + view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view_info.subresourceRange.levelCount = 1; + view_info.subresourceRange.layerCount = 1; + VK_CHECK(device_data->vtable.CreateImageView(device_data->device, &view_info, + NULL, &data->font_image_view)); + + /* Descriptor set */ + VkDescriptorImageInfo desc_image[1] = {}; + desc_image[0].sampler = data->font_sampler; + desc_image[0].imageView = data->font_image_view; + desc_image[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + VkWriteDescriptorSet write_desc[1] = {}; + write_desc[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_desc[0].dstSet = data->descriptor_set; + write_desc[0].descriptorCount = 1; + write_desc[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write_desc[0].pImageInfo = desc_image; + device_data->vtable.UpdateDescriptorSets(device_data->device, 1, write_desc, 0, NULL); +} + +static void setup_swapchain_data(struct swapchain_data *data, + const VkSwapchainCreateInfoKHR *pCreateInfo) +{ + data->width = pCreateInfo->imageExtent.width; + data->height = pCreateInfo->imageExtent.height; + data->format = pCreateInfo->imageFormat; + + data->imgui_context = ImGui::CreateContext(); + ImGui::SetCurrentContext(data->imgui_context); + + ImGui::GetIO().IniFilename = NULL; + ImGui::GetIO().DisplaySize = ImVec2((float)data->width, (float)data->height); + + struct device_data *device_data = data->device; + + /* Render pass */ + VkAttachmentDescription attachment_desc = {}; + attachment_desc.format = pCreateInfo->imageFormat; + attachment_desc.samples = VK_SAMPLE_COUNT_1_BIT; + attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment_desc.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachment_desc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + VkAttachmentReference color_attachment = {}; + color_attachment.attachment = 0; + color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkSubpassDescription subpass = {}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &color_attachment; + VkSubpassDependency dependency = {}; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.srcAccessMask = 0; + dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkRenderPassCreateInfo render_pass_info = {}; + render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_info.attachmentCount = 1; + render_pass_info.pAttachments = &attachment_desc; + render_pass_info.subpassCount = 1; + render_pass_info.pSubpasses = &subpass; + render_pass_info.dependencyCount = 1; + render_pass_info.pDependencies = &dependency; + VK_CHECK(device_data->vtable.CreateRenderPass(device_data->device, + &render_pass_info, + NULL, &data->render_pass)); + + setup_swapchain_data_pipeline(data); + + VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, + data->swapchain, + &data->n_images, + NULL)); + + data->images = ralloc_array(data, VkImage, data->n_images); + data->image_views = ralloc_array(data, VkImageView, data->n_images); + data->framebuffers = ralloc_array(data, VkFramebuffer, data->n_images); + + VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, + data->swapchain, + &data->n_images, + data->images)); + + /* Image views */ + VkImageViewCreateInfo view_info = {}; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = pCreateInfo->imageFormat; + view_info.components.r = VK_COMPONENT_SWIZZLE_R; + view_info.components.g = VK_COMPONENT_SWIZZLE_G; + view_info.components.b = VK_COMPONENT_SWIZZLE_B; + view_info.components.a = VK_COMPONENT_SWIZZLE_A; + view_info.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + for (uint32_t i = 0; i < data->n_images; i++) { + view_info.image = data->images[i]; + VK_CHECK(device_data->vtable.CreateImageView(device_data->device, + &view_info, NULL, + &data->image_views[i])); + } + + /* Framebuffers */ + VkImageView attachment[1]; + VkFramebufferCreateInfo fb_info = {}; + fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fb_info.renderPass = data->render_pass; + fb_info.attachmentCount = 1; + fb_info.pAttachments = attachment; + fb_info.width = data->width; + fb_info.height = data->height; + fb_info.layers = 1; + for (uint32_t i = 0; i < data->n_images; i++) { + attachment[0] = data->image_views[i]; + VK_CHECK(device_data->vtable.CreateFramebuffer(device_data->device, &fb_info, + NULL, &data->framebuffers[i])); + } + + /* Command buffer pool */ + VkCommandPoolCreateInfo cmd_buffer_pool_info = {}; + cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + cmd_buffer_pool_info.queueFamilyIndex = device_data->graphic_queue->family_index; + VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device, + &cmd_buffer_pool_info, + NULL, &data->command_pool)); +} + +static void shutdown_swapchain_data(struct swapchain_data *data) +{ + struct device_data *device_data = data->device; + + list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) { + device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL); + device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL); + device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL); + device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL); + device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL); + device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL); + } + + for (uint32_t i = 0; i < data->n_images; i++) { + device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL); + device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL); + } + + device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL); + + device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL); + + device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL); + device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL); + + device_data->vtable.DestroyDescriptorPool(device_data->device, + data->descriptor_pool, NULL); + device_data->vtable.DestroyDescriptorSetLayout(device_data->device, + data->descriptor_layout, NULL); + + device_data->vtable.DestroySampler(device_data->device, data->font_sampler, NULL); + device_data->vtable.DestroyImageView(device_data->device, data->font_image_view, NULL); + device_data->vtable.DestroyImage(device_data->device, data->font_image, NULL); + device_data->vtable.FreeMemory(device_data->device, data->font_mem, NULL); + + device_data->vtable.DestroyBuffer(device_data->device, data->upload_font_buffer, NULL); + device_data->vtable.FreeMemory(device_data->device, data->upload_font_buffer_mem, NULL); + + ImGui::DestroyContext(data->imgui_context); +} + +static struct overlay_draw *before_present(struct swapchain_data *swapchain_data, + struct queue_data *present_queue, + const VkSemaphore *wait_semaphores, + unsigned n_wait_semaphores, + unsigned imageIndex) +{ + struct instance_data *instance_data = swapchain_data->device->instance; + struct overlay_draw *draw = NULL; + + snapshot_swapchain_frame(swapchain_data); + + if (!instance_data->params.no_display && swapchain_data->n_frames > 0) { + compute_swapchain_display(swapchain_data); + draw = render_swapchain_display(swapchain_data, present_queue, + wait_semaphores, n_wait_semaphores, + imageIndex); + } + + return draw; +} + +static VkResult overlay_CreateSwapchainKHR( + VkDevice device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain) +{ + struct device_data *device_data = FIND(struct device_data, device); + VkResult result = device_data->vtable.CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain); + if (result != VK_SUCCESS) return result; + + struct swapchain_data *swapchain_data = new_swapchain_data(*pSwapchain, device_data); + setup_swapchain_data(swapchain_data, pCreateInfo); + return result; +} + +static void overlay_DestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR swapchain, + const VkAllocationCallbacks* pAllocator) +{ + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + + shutdown_swapchain_data(swapchain_data); + swapchain_data->device->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); + destroy_swapchain_data(swapchain_data); +} + +static VkResult overlay_QueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo) +{ + struct queue_data *queue_data = FIND(struct queue_data, queue); + struct device_data *device_data = queue_data->device; + struct instance_data *instance_data = device_data->instance; + uint32_t query_results[OVERLAY_QUERY_COUNT]; + + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++; + + if (list_length(&queue_data->running_command_buffer) > 0) { + /* Before getting the query results, make sure the operations have + * completed. + */ + VK_CHECK(device_data->vtable.ResetFences(device_data->device, + 1, &queue_data->queries_fence)); + VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence)); + VK_CHECK(device_data->vtable.WaitForFences(device_data->device, + 1, &queue_data->queries_fence, + VK_FALSE, UINT64_MAX)); + + /* Now get the results. */ + list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data, + &queue_data->running_command_buffer, link) { + list_delinit(&cmd_buffer_data->link); + + if (cmd_buffer_data->pipeline_query_pool) { + memset(query_results, 0, sizeof(query_results)); + VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 1, + sizeof(uint32_t) * OVERLAY_QUERY_COUNT, + query_results, 0, VK_QUERY_RESULT_WAIT_BIT)); + + for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices; + i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { + device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices]; + } + } + if (cmd_buffer_data->timestamp_query_pool) { + uint64_t gpu_timestamps[2] = { 0 }; + VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2, 2, + 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t), + VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT)); + + gpu_timestamps[0] &= queue_data->timestamp_mask; + gpu_timestamps[1] &= queue_data->timestamp_mask; + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] += + (gpu_timestamps[1] - gpu_timestamps[0]) * + device_data->properties.limits.timestampPeriod; + } + } + } + + /* Otherwise we need to add our overlay drawing semaphore to the list of + * semaphores to wait on. If we don't do that the presented picture might + * be have incomplete overlay drawings. + */ + VkResult result = VK_SUCCESS; + if (instance_data->params.no_display) { + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + + before_present(swapchain_data, + queue_data, + pPresentInfo->pWaitSemaphores, + pPresentInfo->waitSemaphoreCount, + pPresentInfo->pImageIndices[i]); + + VkPresentInfoKHR present_info = *pPresentInfo; + present_info.swapchainCount = 1; + present_info.pSwapchains = &swapchain; + + uint64_t ts0 = os_time_get(); + result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); + uint64_t ts1 = os_time_get(); + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; + } + } else { + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + VkPresentInfoKHR present_info = *pPresentInfo; + present_info.swapchainCount = 1; + present_info.pSwapchains = &swapchain; + + uint32_t image_index = pPresentInfo->pImageIndices[i]; + + struct overlay_draw *draw = before_present(swapchain_data, + queue_data, + pPresentInfo->pWaitSemaphores, + pPresentInfo->waitSemaphoreCount, + image_index); + + /* Because the submission of the overlay draw waits on the semaphores + * handed for present, we don't need to have this present operation + * wait on them as well, we can just wait on the overlay submission + * semaphore. + */ + present_info.pWaitSemaphores = &draw->semaphore; + present_info.waitSemaphoreCount = 1; + + uint64_t ts0 = os_time_get(); + VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); + uint64_t ts1 = os_time_get(); + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; + if (pPresentInfo->pResults) + pPresentInfo->pResults[i] = chain_result; + if (chain_result != VK_SUCCESS && result == VK_SUCCESS) + result = chain_result; + } + } + return result; +} + +static VkResult overlay_AcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex) +{ + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, swapchain); + struct device_data *device_data = swapchain_data->device; + + uint64_t ts0 = os_time_get(); + VkResult result = device_data->vtable.AcquireNextImageKHR(device, swapchain, timeout, + semaphore, fence, pImageIndex); + uint64_t ts1 = os_time_get(); + + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; + + return result; +} + +static VkResult overlay_AcquireNextImage2KHR( + VkDevice device, + const VkAcquireNextImageInfoKHR* pAcquireInfo, + uint32_t* pImageIndex) +{ + struct swapchain_data *swapchain_data = + FIND(struct swapchain_data, pAcquireInfo->swapchain); + struct device_data *device_data = swapchain_data->device; + + uint64_t ts0 = os_time_get(); + VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex); + uint64_t ts1 = os_time_get(); + + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; + swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; + + return result; +} + +static void overlay_CmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount, + firstVertex, firstInstance); +} + +static void overlay_CmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount, + firstIndex, vertexOffset, firstInstance); +} + +static void overlay_CmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +static void overlay_CmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +static void overlay_CmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDrawIndirectCount(commandBuffer, buffer, offset, + countBuffer, countBufferOffset, + maxDrawCount, stride); +} + +static void overlay_CmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, + countBuffer, countBufferOffset, + maxDrawCount, stride); +} + +static void overlay_CmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); +} + +static void overlay_CmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++; + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset); +} + +static void overlay_CmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline pipeline) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break; + case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break; + case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break; + default: break; + } + struct device_data *device_data = cmd_buffer_data->device; + device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline); +} + +static VkResult overlay_BeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); + + /* We don't record any query in secondary command buffers, just make sure + * we have the right inheritance. + */ + if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *) + clone_chain((const struct VkBaseInStructure *)pBeginInfo); + VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *) + vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO); + VkCommandBufferInheritanceInfo inhe_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, + NULL, + VK_NULL_HANDLE, + 0, + VK_NULL_HANDLE, + VK_FALSE, + 0, + overlay_query_flags, + }; + + if (parent_inhe_info) + parent_inhe_info->pipelineStatistics = overlay_query_flags; + else { + inhe_info.pNext = begin_info->pNext; + begin_info->pNext = &inhe_info; + } + + VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); + + if (!parent_inhe_info) + begin_info->pNext = inhe_info.pNext; + + free_chain((struct VkBaseOutStructure *)begin_info); + + return result; + } + + /* Otherwise record a begin query as first command. */ + VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); + + if (result == VK_SUCCESS) { + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdResetQueryPool(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 1); + } + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdResetQueryPool(commandBuffer, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2, 2); + } + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdBeginQuery(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index, 0); + } + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdWriteTimestamp(commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2); + } + } + + return result; +} + +static VkResult overlay_EndCommandBuffer( + VkCommandBuffer commandBuffer) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + if (cmd_buffer_data->timestamp_query_pool) { + device_data->vtable.CmdWriteTimestamp(commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + cmd_buffer_data->timestamp_query_pool, + cmd_buffer_data->query_index * 2 + 1); + } + if (cmd_buffer_data->pipeline_query_pool) { + device_data->vtable.CmdEndQuery(commandBuffer, + cmd_buffer_data->pipeline_query_pool, + cmd_buffer_data->query_index); + } + + return device_data->vtable.EndCommandBuffer(commandBuffer); +} + +static VkResult overlay_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); + + return device_data->vtable.ResetCommandBuffer(commandBuffer, flags); +} + +static void overlay_CmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, commandBuffer); + struct device_data *device_data = cmd_buffer_data->device; + + /* Add the stats of the executed command buffers to the primary one. */ + for (uint32_t c = 0; c < commandBufferCount; c++) { + struct command_buffer_data *sec_cmd_buffer_data = + FIND(struct command_buffer_data, pCommandBuffers[c]); + + for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) + cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s]; + } + + device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers); +} + +static VkResult overlay_AllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + struct device_data *device_data = FIND(struct device_data, device); + VkResult result = + device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); + if (result != VK_SUCCESS) + return result; + + VkQueryPool pipeline_query_pool = VK_NULL_HANDLE; + VkQueryPool timestamp_query_pool = VK_NULL_HANDLE; + if (device_data->instance->pipeline_statistics_enabled && + pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + VkQueryPoolCreateInfo pool_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + NULL, + 0, + VK_QUERY_TYPE_PIPELINE_STATISTICS, + pAllocateInfo->commandBufferCount, + overlay_query_flags, + }; + VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, + NULL, &pipeline_query_pool)); + } + if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) { + VkQueryPoolCreateInfo pool_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + NULL, + 0, + VK_QUERY_TYPE_TIMESTAMP, + pAllocateInfo->commandBufferCount * 2, + 0, + }; + VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, + NULL, ×tamp_query_pool)); + } + + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) { + new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, + pipeline_query_pool, timestamp_query_pool, + i, device_data); + } + + if (pipeline_query_pool) + map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); + if (timestamp_query_pool) + map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); + + return result; +} + +static void overlay_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + struct device_data *device_data = FIND(struct device_data, device); + for (uint32_t i = 0; i < commandBufferCount; i++) { + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, pCommandBuffers[i]); + + /* It is legal to free a NULL command buffer*/ + if (!cmd_buffer_data) + continue; + + uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool)); + if (count == 1) { + unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool)); + device_data->vtable.DestroyQueryPool(device_data->device, + cmd_buffer_data->pipeline_query_pool, NULL); + } else if (count != 0) { + map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1)); + } + count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool)); + if (count == 1) { + unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool)); + device_data->vtable.DestroyQueryPool(device_data->device, + cmd_buffer_data->timestamp_query_pool, NULL); + } else if (count != 0) { + map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1)); + } + destroy_command_buffer_data(cmd_buffer_data); + } + + device_data->vtable.FreeCommandBuffers(device, commandPool, + commandBufferCount, pCommandBuffers); +} + +static VkResult overlay_QueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence) +{ + struct queue_data *queue_data = FIND(struct queue_data, queue); + struct device_data *device_data = queue_data->device; + + device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++; + + for (uint32_t s = 0; s < submitCount; s++) { + for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) { + struct command_buffer_data *cmd_buffer_data = + FIND(struct command_buffer_data, pSubmits[s].pCommandBuffers[c]); + + /* Merge the submitted command buffer stats into the device. */ + for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++) + device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st]; + + /* Attach the command buffer to the queue so we remember to read its + * pipeline statistics & timestamps at QueuePresent(). + */ + if (!cmd_buffer_data->pipeline_query_pool && + !cmd_buffer_data->timestamp_query_pool) + continue; + + if (list_is_empty(&cmd_buffer_data->link)) { + list_addtail(&cmd_buffer_data->link, + &queue_data->running_command_buffer); + } else { + fprintf(stderr, "Command buffer submitted multiple times before present.\n" + "This could lead to invalid data.\n"); + } + } + } + + return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); +} + +static VkResult overlay_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice) +{ + struct instance_data *instance_data = + FIND(struct instance_data, physicalDevice); + VkLayerDeviceCreateInfo *chain_info = + get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); + + assert(chain_info->u.pLayerInfo); + PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; + PFN_vkGetDeviceProcAddr fpGetDeviceProcAddr = chain_info->u.pLayerInfo->pfnNextGetDeviceProcAddr; + PFN_vkCreateDevice fpCreateDevice = (PFN_vkCreateDevice)fpGetInstanceProcAddr(NULL, "vkCreateDevice"); + if (fpCreateDevice == NULL) { + return VK_ERROR_INITIALIZATION_FAILED; + } + + // Advance the link info for the next element on the chain + chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; + + VkPhysicalDeviceFeatures device_features = {}; + VkDeviceCreateInfo device_info = *pCreateInfo; + + if (pCreateInfo->pEnabledFeatures) + device_features = *(pCreateInfo->pEnabledFeatures); + if (instance_data->pipeline_statistics_enabled) { + device_features.inheritedQueries = true; + device_features.pipelineStatisticsQuery = true; + } + device_info.pEnabledFeatures = &device_features; + + + VkResult result = fpCreateDevice(physicalDevice, &device_info, pAllocator, pDevice); + if (result != VK_SUCCESS) return result; + + struct device_data *device_data = new_device_data(*pDevice, instance_data); + device_data->physical_device = physicalDevice; + vk_load_device_commands(*pDevice, fpGetDeviceProcAddr, &device_data->vtable); + + instance_data->vtable.GetPhysicalDeviceProperties(device_data->physical_device, + &device_data->properties); + + VkLayerDeviceCreateInfo *load_data_info = + get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK); + device_data->set_device_loader_data = load_data_info->u.pfnSetDeviceLoaderData; + + device_map_queues(device_data, pCreateInfo); + + return result; +} + +static void overlay_DestroyDevice( + VkDevice device, + const VkAllocationCallbacks* pAllocator) +{ + struct device_data *device_data = FIND(struct device_data, device); + device_unmap_queues(device_data); + device_data->vtable.DestroyDevice(device, pAllocator); + destroy_device_data(device_data); +} + +static VkResult overlay_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + VkLayerInstanceCreateInfo *chain_info = + get_instance_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); + + engineName = pCreateInfo->pApplicationInfo->pEngineName; + if (engineName == "DXVK" || engineName == "vkd3d") { + int engineVer = pCreateInfo->pApplicationInfo->engineVersion; + engineVersion = to_string(VK_VERSION_MAJOR(engineVer)) + "." + to_string(VK_VERSION_MINOR(engineVer)) + "." + to_string(VK_VERSION_PATCH(engineVer)); + } + + if (engineName != "DXVK" && engineName != "vkd3d" && engineName != "Feral3D") + engineName = "VULKAN"; + + if (engineName == "vkd3d") + engineName = "VKD3D"; + + assert(chain_info->u.pLayerInfo); + PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = + chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; + PFN_vkCreateInstance fpCreateInstance = + (PFN_vkCreateInstance)fpGetInstanceProcAddr(NULL, "vkCreateInstance"); + if (fpCreateInstance == NULL) { + return VK_ERROR_INITIALIZATION_FAILED; + } + + // Advance the link info for the next element on the chain + chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; + + VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance); + if (result != VK_SUCCESS) return result; + + struct instance_data *instance_data = new_instance_data(*pInstance); + vk_load_instance_commands(instance_data->instance, + fpGetInstanceProcAddr, + &instance_data->vtable); + instance_data_map_physical_devices(instance_data, true); + + parse_overlay_env(&instance_data->params, getenv("MANGOHUD_CONFIG")); + + int font_size; + instance_data->params.font_size > 0 ? font_size = instance_data->params.font_size : font_size = 24; + + hudSpacing = font_size / 2; + hudFirstRow = font_size * 5; + hudSecondRow = font_size * 8; + + /* If there's no control file, and an output_file was specified, start + * capturing fps data right away. + */ + instance_data->capture_enabled = + instance_data->params.output_file && instance_data->params.control < 0; + instance_data->capture_started = instance_data->capture_enabled; + + for (int i = OVERLAY_PARAM_ENABLED_vertices; + i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { + if (instance_data->params.enabled[i]) { + instance_data->pipeline_statistics_enabled = true; + break; + } + } + + return result; +} + +static void overlay_DestroyInstance( + VkInstance instance, + const VkAllocationCallbacks* pAllocator) +{ + struct instance_data *instance_data = FIND(struct instance_data, instance); + instance_data_map_physical_devices(instance_data, false); + instance_data->vtable.DestroyInstance(instance, pAllocator); + destroy_instance_data(instance_data); +} + +static const struct { + const char *name; + void *ptr; +} name_to_funcptr_map[] = { + { "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr }, +#define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn } +#define ADD_ALIAS_HOOK(alias, fn) { "vk" # alias, (void *) overlay_ ## fn } + ADD_HOOK(AllocateCommandBuffers), + ADD_HOOK(FreeCommandBuffers), + ADD_HOOK(ResetCommandBuffer), + ADD_HOOK(BeginCommandBuffer), + ADD_HOOK(EndCommandBuffer), + ADD_HOOK(CmdExecuteCommands), + + ADD_HOOK(CmdDraw), + ADD_HOOK(CmdDrawIndexed), + ADD_HOOK(CmdDrawIndirect), + ADD_HOOK(CmdDrawIndexedIndirect), + ADD_HOOK(CmdDispatch), + ADD_HOOK(CmdDispatchIndirect), + ADD_HOOK(CmdDrawIndirectCount), + ADD_ALIAS_HOOK(CmdDrawIndirectCountKHR, CmdDrawIndirectCount), + ADD_HOOK(CmdDrawIndexedIndirectCount), + ADD_ALIAS_HOOK(CmdDrawIndexedIndirectCountKHR, CmdDrawIndexedIndirectCount), + + ADD_HOOK(CmdBindPipeline), + + ADD_HOOK(CreateSwapchainKHR), + ADD_HOOK(QueuePresentKHR), + ADD_HOOK(DestroySwapchainKHR), + ADD_HOOK(AcquireNextImageKHR), + ADD_HOOK(AcquireNextImage2KHR), + + ADD_HOOK(QueueSubmit), + + ADD_HOOK(CreateDevice), + ADD_HOOK(DestroyDevice), + + ADD_HOOK(CreateInstance), + ADD_HOOK(DestroyInstance), +#undef ADD_HOOK +}; + +static void *find_ptr(const char *name) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(name_to_funcptr_map); i++) { + if (strcmp(name, name_to_funcptr_map[i].name) == 0) + return name_to_funcptr_map[i].ptr; + } + + return NULL; +} + +VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkDevice dev, + const char *funcName) +{ + void *ptr = find_ptr(funcName); + if (ptr) return reinterpret_cast(ptr); + + if (dev == NULL) return NULL; + + struct device_data *device_data = FIND(struct device_data, dev); + if (device_data->vtable.GetDeviceProcAddr == NULL) return NULL; + return device_data->vtable.GetDeviceProcAddr(dev, funcName); +} + +VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, + const char *funcName) +{ + void *ptr = find_ptr(funcName); + if (ptr) return reinterpret_cast(ptr); + + if (instance == NULL) return NULL; + + struct instance_data *instance_data = FIND(struct instance_data, instance); + if (instance_data->vtable.GetInstanceProcAddr == NULL) return NULL; + return instance_data->vtable.GetInstanceProcAddr(instance, funcName); +} diff --git a/src/overlay.frag b/src/overlay.frag new file mode 100644 index 00000000..313a8880 --- /dev/null +++ b/src/overlay.frag @@ -0,0 +1,14 @@ +#version 450 core +layout(location = 0) out vec4 fColor; + +layout(set=0, binding=0) uniform sampler2D sTexture; + +layout(location = 0) in struct{ + vec4 Color; + vec2 UV; +} In; + +void main() +{ + fColor = In.Color * texture(sTexture, In.UV.st); +} diff --git a/src/overlay.vert b/src/overlay.vert new file mode 100644 index 00000000..20b29082 --- /dev/null +++ b/src/overlay.vert @@ -0,0 +1,25 @@ +#version 450 core +layout(location = 0) in vec2 aPos; +layout(location = 1) in vec2 aUV; +layout(location = 2) in vec4 aColor; + +layout(push_constant) uniform uPushConstant{ + vec2 uScale; + vec2 uTranslate; +} pc; + +out gl_PerVertex{ + vec4 gl_Position; +}; + +layout(location = 0) out struct{ + vec4 Color; + vec2 UV; +} Out; + +void main() +{ + Out.Color = aColor; + Out.UV = aUV; + gl_Position = vec4(aPos*pc.uScale+pc.uTranslate, 0, 1); +} diff --git a/src/overlay_params.c b/src/overlay_params.c new file mode 100644 index 00000000..95715691 --- /dev/null +++ b/src/overlay_params.c @@ -0,0 +1,223 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "overlay_params.h" + +#include "mesa/util/os_socket.h" + +static enum overlay_param_position +parse_position(const char *str) +{ + if (!str || !strcmp(str, "top-left")) + return LAYER_POSITION_TOP_LEFT; + if (!strcmp(str, "top-right")) + return LAYER_POSITION_TOP_RIGHT; + if (!strcmp(str, "bottom-left")) + return LAYER_POSITION_BOTTOM_LEFT; + if (!strcmp(str, "bottom-right")) + return LAYER_POSITION_BOTTOM_RIGHT; + return LAYER_POSITION_TOP_LEFT; +} + +static FILE * +parse_output_file(const char *str) +{ + return fopen(str, "w+"); +} + +static int +parse_control(const char *str) +{ + int ret = os_socket_listen_abstract(str, 1); + if (ret < 0) { + fprintf(stderr, "ERROR: Couldn't create socket pipe at '%s'\n", str); + fprintf(stderr, "ERROR: '%s'\n", strerror(errno)); + return ret; + } + + os_socket_block(ret, false); + + return ret; +} + +static float +parse_font_size(const char *str) +{ + return strtof(str, NULL); +} + +static uint32_t +parse_fps_sampling_period(const char *str) +{ + return strtol(str, NULL, 0) * 1000; +} + +static bool +parse_no_display(const char *str) +{ + return strtol(str, NULL, 0) != 0; +} + +static unsigned +parse_unsigned(const char *str) +{ + return strtol(str, NULL, 0); +} + +#define parse_width(s) parse_unsigned(s) +#define parse_height(s) parse_unsigned(s) + +static bool +parse_help(const char *str) +{ + fprintf(stderr, "Layer params using VK_LAYER_MESA_OVERLAY_CONFIG=\n"); +#define OVERLAY_PARAM_BOOL(name) \ + fprintf(stderr, "\t%s=0|1\n", #name); +#define OVERLAY_PARAM_CUSTOM(name) + OVERLAY_PARAMS +#undef OVERLAY_PARAM_BOOL +#undef OVERLAY_PARAM_CUSTOM + fprintf(stderr, "\tposition=top-left|top-right|bottom-left|bottom-right\n"); + fprintf(stderr, "\tfps_sampling_period=number-of-milliseconds\n"); + fprintf(stderr, "\tno_display=0|1\n"); + fprintf(stderr, "\toutput_file=/path/to/output.txt\n"); + fprintf(stderr, "\twidth=width-in-pixels\n"); + fprintf(stderr, "\theight=height-in-pixels\n"); + + return true; +} + +static bool is_delimiter(char c) +{ + return c == 0 || c == ',' || c == ':' || c == ';' || c == '='; +} + +static int +parse_string(const char *s, char *out_param, char *out_value) +{ + int i = 0; + + for (; !is_delimiter(*s); s++, out_param++, i++) + *out_param = *s; + + *out_param = 0; + + if (*s == '=') { + s++; + i++; + for (; !is_delimiter(*s); s++, out_value++, i++) + *out_value = *s; + } else + *(out_value++) = '1'; + *out_value = 0; + + if (*s && is_delimiter(*s)) { + s++; + i++; + } + + if (*s && !i) { + fprintf(stderr, "mesa-overlay: syntax error: unexpected '%c' (%i) while " + "parsing a string\n", *s, *s); + fflush(stderr); + } + + return i; +} + +const char *overlay_param_names[] = { +#define OVERLAY_PARAM_BOOL(name) #name, +#define OVERLAY_PARAM_CUSTOM(name) + OVERLAY_PARAMS +#undef OVERLAY_PARAM_BOOL +#undef OVERLAY_PARAM_CUSTOM +}; + +void +parse_overlay_env(struct overlay_params *params, + const char *env) +{ + uint32_t num; + char key[256], value[256]; + + memset(params, 0, sizeof(*params)); + + /* Visible by default */ + params->enabled[OVERLAY_PARAM_ENABLED_fps] = true; + params->enabled[OVERLAY_PARAM_ENABLED_frame_timing] = true; + params->enabled[OVERLAY_PARAM_ENABLED_core_load] = false; + params->enabled[OVERLAY_PARAM_ENABLED_cpu_temp] = false; + params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp] = false; + params->fps_sampling_period = 500000; /* 500ms */ + params->width = 280; + params->height = 140; + params->control = -1; + + if (!env) + return; + + while ((num = parse_string(env, key, value)) != 0) { + env += num; + +#define OVERLAY_PARAM_BOOL(name) \ + if (!strcmp(#name, key)) { \ + params->enabled[OVERLAY_PARAM_ENABLED_##name] = \ + strtol(value, NULL, 0); \ + continue; \ + } +#define OVERLAY_PARAM_CUSTOM(name) \ + if (!strcmp(#name, key)) { \ + params->name = parse_##name(value); \ + continue; \ + } + OVERLAY_PARAMS +#undef OVERLAY_PARAM_BOOL +#undef OVERLAY_PARAM_CUSTOM + fprintf(stderr, "Unknown option '%s'\n", key); + } + // if font_size is used and height has not been changed from default + // increase height as needed based on font_size + bool heightChanged = false; + + if (params->height != 140) + heightChanged = true; + + int FrameTimeGraphHeight = 50; + + if (!params->font_size) + params->font_size = 24.0f; + + if (params->font_size && !heightChanged) + params->height = (params->font_size + 3 * 2) * 3 + FrameTimeGraphHeight; + + // Apply more hud height if cores are enabled + if (params->enabled[OVERLAY_PARAM_ENABLED_core_load] && !heightChanged) + params->height += ((params->font_size - 3) * get_nprocs()); +} diff --git a/src/overlay_params.h b/src/overlay_params.h new file mode 100644 index 00000000..12a152c8 --- /dev/null +++ b/src/overlay_params.h @@ -0,0 +1,117 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef OVERLAY_PARAMS_H +#define OVERLAY_PARAMS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define OVERLAY_PARAMS \ + OVERLAY_PARAM_BOOL(fps) \ + OVERLAY_PARAM_BOOL(frame) \ + OVERLAY_PARAM_BOOL(frame_timing) \ + OVERLAY_PARAM_BOOL(submit) \ + OVERLAY_PARAM_BOOL(draw) \ + OVERLAY_PARAM_BOOL(draw_indexed) \ + OVERLAY_PARAM_BOOL(draw_indirect) \ + OVERLAY_PARAM_BOOL(draw_indexed_indirect) \ + OVERLAY_PARAM_BOOL(draw_indirect_count) \ + OVERLAY_PARAM_BOOL(draw_indexed_indirect_count) \ + OVERLAY_PARAM_BOOL(dispatch) \ + OVERLAY_PARAM_BOOL(dispatch_indirect) \ + OVERLAY_PARAM_BOOL(pipeline_graphics) \ + OVERLAY_PARAM_BOOL(pipeline_compute) \ + OVERLAY_PARAM_BOOL(pipeline_raytracing) \ + OVERLAY_PARAM_BOOL(acquire) \ + OVERLAY_PARAM_BOOL(acquire_timing) \ + OVERLAY_PARAM_BOOL(present_timing) \ + OVERLAY_PARAM_BOOL(vertices) \ + OVERLAY_PARAM_BOOL(primitives) \ + OVERLAY_PARAM_BOOL(vert_invocations) \ + OVERLAY_PARAM_BOOL(geom_invocations) \ + OVERLAY_PARAM_BOOL(geom_primitives) \ + OVERLAY_PARAM_BOOL(clip_invocations) \ + OVERLAY_PARAM_BOOL(clip_primitives) \ + OVERLAY_PARAM_BOOL(frag_invocations) \ + OVERLAY_PARAM_BOOL(tess_ctrl_patches) \ + OVERLAY_PARAM_BOOL(tess_eval_invocations) \ + OVERLAY_PARAM_BOOL(compute_invocations) \ + OVERLAY_PARAM_BOOL(gpu_timing) \ + OVERLAY_PARAM_BOOL(core_load) \ + OVERLAY_PARAM_BOOL(cpu_temp) \ + OVERLAY_PARAM_BOOL(gpu_temp) \ + OVERLAY_PARAM_CUSTOM(fps_sampling_period) \ + OVERLAY_PARAM_CUSTOM(output_file) \ + OVERLAY_PARAM_CUSTOM(position) \ + OVERLAY_PARAM_CUSTOM(width) \ + OVERLAY_PARAM_CUSTOM(height) \ + OVERLAY_PARAM_CUSTOM(no_display) \ + OVERLAY_PARAM_CUSTOM(control) \ + OVERLAY_PARAM_CUSTOM(font_size) \ + OVERLAY_PARAM_CUSTOM(help) + +enum overlay_param_position { + LAYER_POSITION_TOP_LEFT, + LAYER_POSITION_TOP_RIGHT, + LAYER_POSITION_BOTTOM_LEFT, + LAYER_POSITION_BOTTOM_RIGHT, +}; + +enum overlay_param_enabled { +#define OVERLAY_PARAM_BOOL(name) OVERLAY_PARAM_ENABLED_##name, +#define OVERLAY_PARAM_CUSTOM(name) + OVERLAY_PARAMS +#undef OVERLAY_PARAM_BOOL +#undef OVERLAY_PARAM_CUSTOM + OVERLAY_PARAM_ENABLED_MAX +}; + +struct overlay_params { + bool enabled[OVERLAY_PARAM_ENABLED_MAX]; + enum overlay_param_position position; + FILE *output_file; + int control; + uint32_t fps_sampling_period; /* us */ + bool help; + bool no_display; + unsigned width; + unsigned height; + float font_size; +}; + +const extern char *overlay_param_names[]; + +void parse_overlay_env(struct overlay_params *params, + const char *env); + +#ifdef __cplusplus +} +#endif + +#endif /* OVERLAY_PARAMS_H */ diff --git a/src/setup_mangohud.sh b/src/setup_mangohud.sh new file mode 100755 index 00000000..9faaa17f --- /dev/null +++ b/src/setup_mangohud.sh @@ -0,0 +1,46 @@ +#!/bin/bash +MANGOHUD_DIR=$HOME/.local/share/MangoHud/ +LIB64=$HOME/.local/share/MangoHud/libMangoHud64.so +LIB32=$HOME/.local/share/MangoHud/libMangoHud32.so +IMPLICIT_LAYER_DIR=$HOME/.local/share/vulkan/implicit_layer.d +EXPLICIT_LAYER_DIR=$HOME/.local/share/vulkan/explicit_layer.d + +install() { + mkdir -p $IMPLICIT_LAYER_DIR + mkdir -p $EXPLICIT_LAYER_DIR + mkdir -p $MANGOHUD_DIR + cp -v x32/libMangoHud32.so $MANGOHUD_DIR + cp -v x64/libMangoHud64.so $MANGOHUD_DIR + cp -v implicit_layer.d/mangohud32.json $IMPLICIT_LAYER_DIR + cp -v implicit_layer.d/mangohud64.json $IMPLICIT_LAYER_DIR + cp -v explicit_layer.d/mangohud32.json $EXPLICIT_LAYER_DIR + cp -v explicit_layer.d/mangohud64.json $EXPLICIT_LAYER_DIR + sed -i "s|libMangoHud.so|$LIB32|g" $IMPLICIT_LAYER_DIR/mangohud32.json + sed -i "s|libMangoHud.so|$LIB64|g" $IMPLICIT_LAYER_DIR/mangohud64.json + sed -i "s|64bit|32bit|g" $IMPLICIT_LAYER_DIR/mangohud32.json + sed -i "s|libMangoHud.so|$LIB32|g" $EXPLICIT_LAYER_DIR/mangohud32.json + sed -i "s|libMangoHud.so|$LIB64|g" $EXPLICIT_LAYER_DIR/mangohud64.json + sed -i "s|64bit|32bit|g" $EXPLICIT_LAYER_DIR/mangohud32.json + sed -i "s|mangohud|mangohud32|g" $EXPLICIT_LAYER_DIR/mangohud32.json +} + +uninstall() { + rm -v $MANGOHUD_DIR/libMangoHud32.so + rm -v $MANGOHUD_DIR/libMangoHud64.so + rm -v $IMPLICIT_LAYER_DIR/mangohud32.json + rm -v $IMPLICIT_LAYER_DIR/mangohud64.json +} + +case $1 in + "install") + install + ;; + "uninstall") + uninstall + ;; + *) + echo "Unrecognized action: $1" + echo "Usage: $0 [install|uninstall]" + exit 1 + ;; +esac \ No newline at end of file