Assuming that you can or have built libclang with the Python bindings, below is a script that loops through all the #include statements in a single C/C++ file printing out the files they reference.
#!/usr/bin/env python """ Usage: call with <filename> """ import sys from clang.cindex import * def list_includes(translation_unit): """ Find all includes within the given TranslationUnit """ cursor = translation_unit.cursor includes =  for child in cursor.get_children(): # We're only interested in preprocessor #include directives # if child.kind == CursorKind.INCLUSION_DIRECTIVE: # We don't want Cursors from files other than the one belonging to # translation_unit otherwise we get #includes for every file found # when clang parsed the input file. # if child.location.file != None and child.location.file.name == cursor.displayname: includes.append( child.displayname ) return includes # The name of the file in which to look for #include statements # source_file = sys.argv # This can be a list of compiler flags, [ '-Iinclude_path', '-DDEBUG', ] # parse_arguments = None # Slightly quicker to parse as we are not interested in the contents of functions # parse_flags = TranslationUnit.PARSE_SKIP_FUNCTION_BODIES source_translation_unit = TranslationUnit.from_source( source_file, parse_arguments, None, parse_flags, None) source_includes = list_includes(source_translation_unit) for include in source_includes: print(include)
The main advantage of this method over using regular expressions to find #includes is that the file is parsed by a fully functional C/C++ parser. If any #include is commented out or wrapped by a preprocessor condition then it will not be in the returned list of includes.
#include <map> #if defined(WIN32) # include <windows.h> #endif
The header windows.h in the code above will not be returned by list_includes unless the WIN32 define is added to parse_arguments.
parse_arguments = [ '-DWIN32' ]