aboutsummaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/python/python/create_manifest2.py
blob: 4c55bd7d7a04fd056445ac72802c66390bc89f28 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# This script is used as a bitbake task to create a new python manifest
# $ bitbake python -c create_manifest
#
# Our goal is to keep python-core as small as posible and add other python
# packages only when the user needs them, hence why we split upstream python
# into several packages.
#
# In a very simplistic way what this does is: 
# Launch python and see specifically what is required for it to run at a minimum
#
# Go through the python-manifest file and launch a separate task for every single
# one of the files on each package, this task will check what was required for that
# specific module to run, these modules will be called dependencies.
# The output of such task will be a list of the modules or dependencies that were
# found for that file.
#
# Such output will be parsed by this script, we will look for each dependency on the
# manifest and if we find that another package already includes it, then we will add
# that package as an RDEPENDS to the package we are currently checking; in case we dont
# find the current dependency on any other package we will add it to the current package
# as part of FILES.
#
#
# This way we will create a new manifest from the data structure that was built during
# this process, ont this new manifest each package will contain specifically only
# what it needs to run.
#
# There are some caveats which we try to deal with, such as repeated files on different
# packages, packages that include folders, wildcards, and special packages.
# Its also important to note that this method only works for python files, and shared
# libraries. Static libraries, header files and binaries need to be dealt with manually.
#
# Author: Alejandro Enedino Hernandez Samaniego "aehs29" <alejandro.hernandez@intel.com>


import sys
import subprocess
import json
import os

# Hack to get native python search path (for folders), not fond of it but it works for now
pivot='recipe-sysroot-native'
for p in sys.path:
  if pivot in p:
    nativelibfolder=p[:p.find(pivot)+len(pivot)]

# Empty dict to hold the whole manifest
new_manifest = {}

# Check for repeated files, folders and wildcards
allfiles=[]
repeated=[]
wildcards=[]

hasfolders=[]
allfolders=[]

def isFolder(value):
  if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
    return True
  else:
    return False

# Read existing JSON manifest
with open('python2-manifest.json') as manifest:
  old_manifest=json.load(manifest)


# First pass to get core-package functionality, because we base everything on the fact that core is actually working
# Not exactly the same so it should not be a function
print ("Getting dependencies for core package:")

# Special call to check for core package
output = subprocess.check_output([sys.executable, 'get_module_deps2.py', 'python-core-package'])
for item in output.split():
    # We append it so it doesnt hurt what we currently have:
    if item not in old_manifest['core']['files']:
        # We use the same data structure since its the one which will be used to check
        # dependencies for other packages
        old_manifest['core']['files'].append(item)

for value in old_manifest['core']['files']:
  # Ignore folders, since we don't import those, difficult to handle multilib
  if isFolder(value):
    # Pass it directly
    if value not in old_manifest['core']['files']:
      old_manifest['core']['files'].append(value)
  # Ignore binaries, since we don't import those, assume it was added correctly (manually)
  if '${bindir}' in value:
    # Pass it directly
    if value not in old_manifest['core']['files']:
      old_manifest['core']['files'].append(value)
    continue
  # Ignore empty values
  if value == '':
    continue
  if '${includedir}' in value:
    if value not in old_manifest['core']['files']:
      old_manifest['core']['files'].append(value)
    continue
  # Get module name , shouldnt be affected by libdir/bindir
  value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0]


  # Launch separate task for each module for deterministic behavior
  # Each module will only import what is necessary for it to work in specific
  print ('Getting dependencies for module: %s' % value)
  output = subprocess.check_output([sys.executable, 'get_module_deps2.py', '%s' % value])
  for item in output.split():
    # We append it so it doesnt hurt what we currently have:
    if item not in old_manifest['core']['files']:
      old_manifest['core']['files'].append(item)

# We check which packages include folders
for key in old_manifest:
    for value in old_manifest[key]['files']:
        # Ignore folders, since we don't import those, difficult to handle multilib
        if isFolder(value):
            print ('%s is a folder' % value)
            if key not in hasfolders:
                hasfolders.append(key)
            if value not in allfolders:
                allfolders.append(value)

for key in old_manifest:
    # Use an empty dict as data structure to hold data for each package and fill it up
    new_manifest[key]={}
    new_manifest[key]['files']=[]
    new_manifest[key]['rdepends']=[]
    # All packages should depend on core
    if key != 'core':
         new_manifest[key]['rdepends'].append('core')
    new_manifest[key]['summary']=old_manifest[key]['summary']

    # Handle special cases, we assume that when they were manually added 
    # to the manifest we knew what we were doing.
    print ('Handling package %s' % key)
    special_packages=['misc', 'modules', 'dev']
    if key in special_packages or 'staticdev' in key:
        print('Passing %s package directly' % key)
        new_manifest[key]=old_manifest[key]
        continue

    for value in old_manifest[key]['files']:
        # We already handled core on the first pass
        if key == 'core':
            new_manifest[key]['files'].append(value)
            continue
        # Ignore folders, since we don't import those, difficult to handle multilib
        if isFolder(value):
            # Pass folders directly
            new_manifest[key]['files'].append(value)
        # Ignore binaries, since we don't import those
        if '${bindir}' in value:
            # Pass it directly to the new manifest data structure
            if value not in new_manifest[key]['files']:
                new_manifest[key]['files'].append(value)
            continue
        # Ignore empty values
        if value == '':
            continue
        if '${includedir}' in value:
            if value not in new_manifest[key]['files']:
                new_manifest[key]['files'].append(value)
            continue
        # Get module name , shouldnt be affected by libdir/bindir
        value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0]

        # Launch separate task for each module for deterministic behavior
        # Each module will only import what is necessary for it to work in specific
        print ('Getting dependencies for module: %s' % value)
        output = subprocess.check_output([sys.executable, 'get_module_deps2.py', '%s' % value])

        # We can print dependencies for debugging purposes
        #print (output)
        # Output will have all dependencies
        for item in output.split():

            # Warning: This first part is ugly
            # One of the dependencies that was found, could be inside of one of the folders included by another package
            # We need to check if this happens so we can add the package containing the folder as an RDEPENDS
            # e.g. Folder encodings contained in codecs
            # This would be solved if no packages included any folders

            # This can be done in two ways:
            # 1 - We assume that if we take out the filename from the path we would get
            #   the folder string, then we would check if folder string is in the list of folders
            #   This would not work if a package contains a folder which contains another folder
            #   e.g. path/folder1/folder2/filename  folder_string= path/folder1/folder2
            #   folder_string would not match any value contained in the list of folders
            #
            # 2 - We do it the other way around, checking if the folder is contained in the path
            #   e.g. path/folder1/folder2/filename  folder_string= path/folder1/folder2
            #   is folder_string inside path/folder1/folder2/filename?, 
            #   Yes, it works, but we waste a couple of milliseconds.

            inFolders=False
            for folder in allfolders:
                if folder in item:
                    inFolders = True # Did we find a folder?
                    folderFound = False # Second flag to break inner for
                    # Loop only through packages which contain folders
                    for keyfolder in hasfolders:
                        if (folderFound == False):
                            #print("Checking folder %s on package %s" % (item,keyfolder))
                            for file_folder in old_manifest[keyfolder]['files']:
                                if file_folder==folder:
                                    print ('%s found in %s' % (folder, keyfolder))
                                    folderFound = True
                                    if keyfolder not in new_manifest[key]['rdepends'] and keyfolder != key:
                                        new_manifest[key]['rdepends'].append(keyfolder)
                        else:
                            break

            # A folder was found so we're done with this item, we can go on
            if inFolders:
                continue

            # We might already have it on the dictionary since it could depend on a (previously checked) module
            if item not in new_manifest[key]['files']:
                # Handle core as a special package, we already did it so we pass it to NEW data structure directly
                if key=='core':
                  print('Adding %s to %s FILES' % (item, key))
                  if item.endswith('*'):
                      wildcards.append(item)
                  new_manifest[key]['files'].append(item)

                  # Check for repeated files
                  if item not in allfiles:
                      allfiles.append(item)
                  else:
                      repeated.append(item)

                else:

                    # Check if this dependency is already contained on another package, so we add it
                    # as an RDEPENDS, or if its not, it means it should be contained on the current
                    # package, so we should add it to FILES
                    for newkey in old_manifest:
                        # Debug
                        #print("Checking %s " % item + " in %s" % newkey)
                        if item in old_manifest[newkey]['files']:      
                                # Since were nesting, we need to check its not the same key
                                if(newkey!=key):
                                    if newkey not in new_manifest[key]['rdepends']:
                                       # Add it to the new manifest data struct
                                       # Debug
                                       print('Adding %s to %s RDEPENDS, because it contains %s' % (newkey, key, item))
                                       new_manifest[key]['rdepends'].append(newkey)
                                    break
                    else:
                      # Debug
                      print('Adding %s to %s FILES' % (item, key))
                      # Since it wasnt found on another package, its not an RDEP, so add it to FILES for this package
                      new_manifest[key]['files'].append(item)
                      if item.endswith('*'):
                          wildcards.append(item)
                      if item not in allfiles:
                          allfiles.append(item)
                      else:
                          repeated.append(item)

print ('The following files are repeated (contained in more than one package), please check which package should get it:')
print (repeated)
print('The following files contain wildcards, please check they are necessary')
print(wildcards)
print('The following files contain folders, please check they are necessary')
print(hasfolders)

# Sort it just so it looks nice 
for key in new_manifest:
    new_manifest[key]['files'].sort()
    new_manifest[key]['rdepends'].sort()

# Create the manifest from the data structure that was built
with open('python2-manifest.json.new','w') as outfile:
    json.dump(new_manifest,outfile,sort_keys=True, indent=4)