summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/python/python3/create_manifest3.py
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/python/python3/create_manifest3.py')
-rw-r--r--meta/recipes-devtools/python/python3/create_manifest3.py444
1 files changed, 444 insertions, 0 deletions
diff --git a/meta/recipes-devtools/python/python3/create_manifest3.py b/meta/recipes-devtools/python/python3/create_manifest3.py
new file mode 100644
index 0000000000..045240ea0b
--- /dev/null
+++ b/meta/recipes-devtools/python/python3/create_manifest3.py
@@ -0,0 +1,444 @@
+# This script is used as a bitbake task to create a new python manifest
+# $ bitbake python -c create_manifest
+#
+# Our goal is to keep python-core as small as posible and add other python
+# packages only when the user needs them, hence why we split upstream python
+# into several packages.
+#
+# In a very simplistic way what this does is:
+# Launch python and see specifically what is required for it to run at a minimum
+#
+# Go through the python-manifest file and launch a separate task for every single
+# one of the files on each package, this task will check what was required for that
+# specific module to run, these modules will be called dependencies.
+# The output of such task will be a list of the modules or dependencies that were
+# found for that file.
+#
+# Such output will be parsed by this script, we will look for each dependency on the
+# manifest and if we find that another package already includes it, then we will add
+# that package as an RDEPENDS to the package we are currently checking; in case we dont
+# find the current dependency on any other package we will add it to the current package
+# as part of FILES.
+#
+#
+# This way we will create a new manifest from the data structure that was built during
+# this process, on this new manifest each package will contain specifically only
+# what it needs to run.
+#
+# There are some caveats which we try to deal with, such as repeated files on different
+# packages, packages that include folders, wildcards, and special packages.
+# Its also important to note that this method only works for python files, and shared
+# libraries. Static libraries, header files and binaries need to be dealt with manually.
+#
+# This script differs from its python2 version mostly on how shared libraries are handled
+# The manifest file for python3 has an extra field which contains the cached files for
+# each package.
+# Tha method to handle cached files does not work when a module includes a folder which
+# itself contains the pycache folder, gladly this is almost never the case.
+#
+# Author: Alejandro Enedino Hernandez Samaniego <alejandro at enedino dot org>
+
+
+import sys
+import subprocess
+import json
+import os
+import collections
+
+if '-d' in sys.argv:
+ debugFlag = '-d'
+else:
+ debugFlag = ''
+
+# Get python version from ${PYTHON_MAJMIN}
+pyversion = str(sys.argv[1])
+
+# Hack to get native python search path (for folders), not fond of it but it works for now
+pivot = 'recipe-sysroot-native'
+for p in sys.path:
+ if pivot in p:
+ nativelibfolder = p[:p.find(pivot)+len(pivot)]
+
+# Empty dict to hold the whole manifest
+new_manifest = collections.OrderedDict()
+
+# Check for repeated files, folders and wildcards
+allfiles = []
+repeated = []
+wildcards = []
+
+hasfolders = []
+allfolders = []
+
+def isFolder(value):
+ value = value.replace('${PYTHON_MAJMIN}',pyversion)
+ if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
+ return True
+ else:
+ return False
+
+def isCached(item):
+ if '__pycache__' in item:
+ return True
+ else:
+ return False
+
+def prepend_comments(comments, json_manifest):
+ with open(json_manifest, 'r+') as manifest:
+ json_contents = manifest.read()
+ manifest.seek(0, 0)
+ manifest.write(comments + json_contents)
+
+def print_indent(msg, offset):
+ for l in msg.splitlines():
+ msg = ' ' * offset + l
+ print(msg)
+
+
+# Read existing JSON manifest
+with open('python3-manifest.json') as manifest:
+ # The JSON format doesn't allow comments so we hack the call to keep the comments using a marker
+ manifest_str = manifest.read()
+ json_start = manifest_str.find('# EOC') + 6 # EOC + \n
+ manifest.seek(0)
+ comments = manifest.read(json_start)
+ manifest_str = manifest.read()
+ old_manifest = json.loads(manifest_str, object_pairs_hook=collections.OrderedDict)
+
+#
+# First pass to get core-package functionality, because we base everything on the fact that core is actually working
+# Not exactly the same so it should not be a function
+#
+
+print_indent('Getting dependencies for package: core', 0)
+
+
+# This special call gets the core dependencies and
+# appends to the old manifest so it doesnt hurt what it
+# currently holds.
+# This way when other packages check for dependencies
+# on the new core package, they will still find them
+# even when checking the old_manifest
+
+output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package', '%s' % debugFlag]).decode('utf8')
+for coredep in output.split():
+ coredep = coredep.replace(pyversion,'${PYTHON_MAJMIN}')
+ if isCached(coredep):
+ if coredep not in old_manifest['core']['cached']:
+ old_manifest['core']['cached'].append(coredep)
+ else:
+ if coredep not in old_manifest['core']['files']:
+ old_manifest['core']['files'].append(coredep)
+
+
+# The second step is to loop through the existing files contained in the core package
+# according to the old manifest, identify if they are modules, or some other type
+# of file that we cant import (directories, binaries, configs) in which case we
+# can only assume they were added correctly (manually) so we ignore those and
+# pass them to the manifest directly.
+
+for filedep in old_manifest['core']['files']:
+ if isFolder(filedep):
+ if isCached(filedep):
+ if filedep not in old_manifest['core']['cached']:
+ old_manifest['core']['cached'].append(filedep)
+ else:
+ if filedep not in old_manifest['core']['files']:
+ old_manifest['core']['files'].append(filedep)
+ continue
+ if '${bindir}' in filedep:
+ if filedep not in old_manifest['core']['files']:
+ old_manifest['core']['files'].append(filedep)
+ continue
+ if filedep == '':
+ continue
+ if '${includedir}' in filedep:
+ if filedep not in old_manifest['core']['files']:
+ old_manifest['core']['files'].append(filedep)
+ continue
+
+ # Get actual module name , shouldnt be affected by libdir/bindir, etc.
+ pymodule = os.path.splitext(os.path.basename(os.path.normpath(filedep)))[0]
+
+ # We now know that were dealing with a python module, so we can import it
+ # and check what its dependencies are.
+ # We launch a separate task for each module for deterministic behavior.
+ # Each module will only import what is necessary for it to work in specific.
+ # The output of each task will contain each module's dependencies
+
+ print_indent('Getting dependencies for module: %s' % pymodule, 2)
+ output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule, '%s' % debugFlag]).decode('utf8')
+ print_indent('The following dependencies were found for module %s:\n' % pymodule, 4)
+ print_indent(output, 6)
+
+
+ for pymodule_dep in output.split():
+ pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
+
+ if isCached(pymodule_dep):
+ if pymodule_dep not in old_manifest['core']['cached']:
+ old_manifest['core']['cached'].append(pymodule_dep)
+ else:
+ if pymodule_dep not in old_manifest['core']['files']:
+ old_manifest['core']['files'].append(pymodule_dep)
+
+
+# At this point we are done with the core package.
+# The old_manifest dictionary is updated only for the core package because
+# all others will use this a base.
+
+
+print('\n\nChecking for directories...\n')
+# To improve the script speed, we check which packages contain directories
+# since we will be looping through (only) those later.
+for pypkg in old_manifest:
+ for filedep in old_manifest[pypkg]['files']:
+ if isFolder(filedep):
+ print_indent('%s is a directory' % filedep, 2)
+ if pypkg not in hasfolders:
+ hasfolders.append(pypkg)
+ if filedep not in allfolders:
+ allfolders.append(filedep)
+
+
+
+# This is the main loop that will handle each package.
+# It works in a similar fashion than the step before, but
+# we will now be updating a new dictionary that will eventually
+# become the new manifest.
+#
+# The following loops though all packages in the manifest,
+# through all files on each of them, and checks whether or not
+# they are modules and can be imported.
+# If they can be imported, then it checks for dependencies for
+# each of them by launching a separate task.
+# The output of that task is then parsed and the manifest is updated
+# accordingly, wether it should add the module on FILES for the current package
+# or if that module already belongs to another package then the current one
+# will RDEPEND on it
+
+for pypkg in old_manifest:
+ # Use an empty dict as data structure to hold data for each package and fill it up
+ new_manifest[pypkg] = collections.OrderedDict()
+ new_manifest[pypkg]['summary'] = old_manifest[pypkg]['summary']
+ new_manifest[pypkg]['rdepends'] = []
+ new_manifest[pypkg]['files'] = []
+ new_manifest[pypkg]['cached'] = old_manifest[pypkg]['cached']
+
+ # All packages should depend on core
+ if pypkg != 'core':
+ new_manifest[pypkg]['rdepends'].append('core')
+ new_manifest[pypkg]['cached'] = []
+
+ print('\n')
+ print('--------------------------')
+ print('Handling package %s' % pypkg)
+ print('--------------------------')
+
+ # Handle special cases, we assume that when they were manually added
+ # to the manifest we knew what we were doing.
+ special_packages = ['misc', 'modules', 'dev', 'tests']
+ if pypkg in special_packages or 'staticdev' in pypkg:
+ print_indent('Passing %s package directly' % pypkg, 2)
+ new_manifest[pypkg] = old_manifest[pypkg]
+ continue
+
+ for filedep in old_manifest[pypkg]['files']:
+ # We already handled core on the first pass, we can ignore it now
+ if pypkg == 'core':
+ if filedep not in new_manifest[pypkg]['files']:
+ new_manifest[pypkg]['files'].append(filedep)
+ continue
+
+ # Handle/ignore what we cant import
+ if isFolder(filedep):
+ new_manifest[pypkg]['files'].append(filedep)
+ # Asyncio (and others) are both the package and the folder name, we should not skip those...
+ path,mod = os.path.split(filedep)
+ if mod != pypkg:
+ continue
+ if '${bindir}' in filedep:
+ if filedep not in new_manifest[pypkg]['files']:
+ new_manifest[pypkg]['files'].append(filedep)
+ continue
+ if filedep == '':
+ continue
+ if '${includedir}' in filedep:
+ if filedep not in new_manifest[pypkg]['files']:
+ new_manifest[pypkg]['files'].append(filedep)
+ continue
+
+ # Get actual module name , shouldnt be affected by libdir/bindir, etc.
+ # We need to check if the imported module comes from another (e.g. sqlite3.dump)
+ path, pymodule = os.path.split(filedep)
+ path = os.path.basename(path)
+ pymodule = os.path.splitext(os.path.basename(pymodule))[0]
+
+ # If this condition is met, it means we need to import it from another module
+ # or its the folder itself (e.g. unittest)
+ if path == pypkg:
+ if pymodule:
+ pymodule = path + '.' + pymodule
+ else:
+ pymodule = path
+
+
+
+ # We now know that were dealing with a python module, so we can import it
+ # and check what its dependencies are.
+ # We launch a separate task for each module for deterministic behavior.
+ # Each module will only import what is necessary for it to work in specific.
+ # The output of each task will contain each module's dependencies
+
+ print_indent('\nGetting dependencies for module: %s' % pymodule, 2)
+ output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule, '%s' % debugFlag]).decode('utf8')
+ print_indent('The following dependencies were found for module %s:\n' % pymodule, 4)
+ print_indent(output, 6)
+
+ reportFILES = []
+ reportRDEPS = []
+
+ for pymodule_dep in output.split():
+
+ # Warning: This first part is ugly
+ # One of the dependencies that was found, could be inside of one of the folders included by another package
+ # We need to check if this happens so we can add the package containing the folder as an rdependency
+ # e.g. Folder encodings contained in codecs
+ # This would be solved if no packages included any folders
+
+ # This can be done in two ways:
+ # 1 - We assume that if we take out the filename from the path we would get
+ # the folder string, then we would check if folder string is in the list of folders
+ # This would not work if a package contains a folder which contains another folder
+ # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
+ # folder_string would not match any value contained in the list of folders
+ #
+ # 2 - We do it the other way around, checking if the folder is contained in the path
+ # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
+ # is folder_string inside path/folder1/folder2/filename?,
+ # Yes, it works, but we waste a couple of milliseconds.
+
+ pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
+ inFolders = False
+ for folder in allfolders:
+ # The module could have a directory named after it, e.g. xml, if we take out the filename from the path
+ # we'll end up with ${libdir}, and we want ${libdir}/xml
+ if isFolder(pymodule_dep):
+ check_path = pymodule_dep
+ else:
+ check_path = os.path.dirname(pymodule_dep)
+ if folder in check_path :
+ inFolders = True # Did we find a folder?
+ folderFound = False # Second flag to break inner for
+ # Loop only through packages which contain folders
+ for pypkg_with_folder in hasfolders:
+ if (folderFound == False):
+ # print('Checking folder %s on package %s' % (pymodule_dep,pypkg_with_folder))
+ for folder_dep in old_manifest[pypkg_with_folder]['files'] or folder_dep in old_manifest[pypkg_with_folder]['cached']:
+ if folder_dep == folder:
+ print ('%s directory found in %s' % (folder, pypkg_with_folder))
+ folderFound = True
+ if pypkg_with_folder not in new_manifest[pypkg]['rdepends'] and pypkg_with_folder != pypkg:
+ new_manifest[pypkg]['rdepends'].append(pypkg_with_folder)
+ else:
+ break
+
+ # A folder was found so we're done with this item, we can go on
+ if inFolders:
+ continue
+
+
+
+ # No directories beyond this point
+ # We might already have this module on the dictionary since it could depend on a (previously checked) module
+ if pymodule_dep not in new_manifest[pypkg]['files'] and pymodule_dep not in new_manifest[pypkg]['cached']:
+ # Handle core as a special package, we already did it so we pass it to NEW data structure directly
+ if pypkg == 'core':
+ print('Adding %s to %s FILES' % (pymodule_dep, pypkg))
+ if pymodule_dep.endswith('*'):
+ wildcards.append(pymodule_dep)
+ if isCached(pymodule_dep):
+ new_manifest[pypkg]['cached'].append(pymodule_dep)
+ else:
+ new_manifest[pypkg]['files'].append(pymodule_dep)
+
+ # Check for repeated files
+ if pymodule_dep not in allfiles:
+ allfiles.append(pymodule_dep)
+ else:
+ if pymodule_dep not in repeated:
+ repeated.append(pymodule_dep)
+ else:
+
+
+ # Last step: Figure out if we this belongs to FILES or RDEPENDS
+ # We check if this module is already contained on another package, so we add that one
+ # as an RDEPENDS, or if its not, it means it should be contained on the current
+ # package, and we should add it to FILES
+ for possible_rdep in old_manifest:
+ # Debug
+ # print('Checking %s ' % pymodule_dep + ' in %s' % possible_rdep)
+ if pymodule_dep in old_manifest[possible_rdep]['files'] or pymodule_dep in old_manifest[possible_rdep]['cached']:
+ # Since were nesting, we need to check its not the same pypkg
+ if(possible_rdep != pypkg):
+ if possible_rdep not in new_manifest[pypkg]['rdepends']:
+ # Add it to the new manifest data struct as RDEPENDS since it contains something this module needs
+ reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (possible_rdep, pypkg, pymodule_dep))
+ new_manifest[pypkg]['rdepends'].append(possible_rdep)
+ break
+ else:
+
+ # Since this module wasnt found on another package, it is not an RDEP,
+ # so we add it to FILES for this package.
+ # A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
+ if os.path.basename(pymodule_dep) != pypkg:
+ reportFILES.append(('Adding %s to %s FILES\n' % (pymodule_dep, pypkg)))
+ if isCached(pymodule_dep):
+ new_manifest[pypkg]['cached'].append(pymodule_dep)
+ else:
+ new_manifest[pypkg]['files'].append(pymodule_dep)
+ if pymodule_dep.endswith('*'):
+ wildcards.append(pymodule_dep)
+ if pymodule_dep not in allfiles:
+ allfiles.append(pymodule_dep)
+ else:
+ if pymodule_dep not in repeated:
+ repeated.append(pymodule_dep)
+
+ print('\n')
+ print('#################################')
+ print('Summary for module %s' % pymodule)
+ print('FILES found for module %s:' % pymodule)
+ print(''.join(reportFILES))
+ print('RDEPENDS found for module %s:' % pymodule)
+ print(''.join(reportRDEPS))
+ print('#################################')
+
+print('The following FILES contain wildcards, please check if they are necessary')
+print(wildcards)
+print('The following FILES contain folders, please check if they are necessary')
+print(hasfolders)
+
+
+# Sort it just so it looks nicer
+for pypkg in new_manifest:
+ new_manifest[pypkg]['files'].sort()
+ new_manifest[pypkg]['cached'].sort()
+ new_manifest[pypkg]['rdepends'].sort()
+
+# Create the manifest from the data structure that was built
+with open('python3-manifest.json.new','w') as outfile:
+ json.dump(new_manifest,outfile, indent=4)
+ outfile.write('\n')
+
+prepend_comments(comments,'python3-manifest.json.new')
+
+if (repeated):
+ error_msg = '\n\nERROR:\n'
+ error_msg += 'The following files were found in more than one package),\n'
+ error_msg += 'this is likely to happen when new files are introduced after an upgrade,\n'
+ error_msg += 'please check which package should get it,\n modify the manifest accordingly and re-run the create_manifest task:\n'
+ error_msg += '\n'.join(repeated)
+ error_msg += '\n'
+ sys.exit(error_msg)
+