From 78c79821ae7e2f060b9a74ea9aefce98102bb00e Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Wed, 1 Sep 2021 08:44:41 -0500 Subject: classes/create-spdx: Add class Adds a class as a first attempt to create SPDX SBoM documents during the build. This initial work was influenced by [meta-doubleopen][1], although almost completely rewritten. [1]: https://github.com/doubleopen-project/meta-doubleopen Signed-off-by: Joshua Watt Signed-off-by: Richard Purdie --- meta/classes/create-spdx.bbclass | 679 +++++++++++++++++++++++++++++++++++++++ meta/lib/oe/sbom.py | 63 ++++ meta/lib/oe/spdx.py | 263 +++++++++++++++ 3 files changed, 1005 insertions(+) create mode 100644 meta/classes/create-spdx.bbclass create mode 100644 meta/lib/oe/sbom.py create mode 100644 meta/lib/oe/spdx.py diff --git a/meta/classes/create-spdx.bbclass b/meta/classes/create-spdx.bbclass new file mode 100644 index 0000000000..35fb4421d0 --- /dev/null +++ b/meta/classes/create-spdx.bbclass @@ -0,0 +1,679 @@ +# +# SPDX-License-Identifier: GPL-2.0-only +# + +DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx/${MACHINE}" + +# The product name that the CVE database uses. Defaults to BPN, but may need to +# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff). +CVE_PRODUCT ??= "${BPN}" +CVE_VERSION ??= "${PV}" + +SPDXDIR ??= "${WORKDIR}/spdx" +SPDXDEPLOY = "${SPDXDIR}/deploy" +SPDXWORK = "${SPDXDIR}/work" + +SPDX_INCLUDE_SOURCES ??= "0" +SPDX_INCLUDE_PACKAGED ??= "0" +SPDX_ARCHIVE_SOURCES ??= "0" +SPDX_ARCHIVE_PACKAGED ??= "0" + +SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org" +SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdoc" + +do_image_complete[depends] = "virtual/kernel:do_create_spdx" + +def get_doc_namespace(d, doc): + import uuid + namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE")) + return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX"), doc.name, str(uuid.uuid5(namespace_uuid, doc.name))) + + +def is_work_shared(d): + pn = d.getVar('PN') + return bb.data.inherits_class('kernel', d) or pn.startswith('gcc-source') + + +def convert_license_to_spdx(lic, d): + def convert(l): + if l == "&": + return "AND" + + if l == "|": + return "OR" + + spdx = d.getVarFlag('SPDXLICENSEMAP', l) + if spdx is not None: + return spdx + + return l + + return ' '.join(convert(l) for l in lic.split()) + + +def process_sources(d): + pn = d.getVar('PN') + assume_provided = (d.getVar("ASSUME_PROVIDED") or "").split() + if pn in assume_provided: + for p in d.getVar("PROVIDES").split(): + if p != pn: + pn = p + break + + # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted, + # so avoid archiving source here. + if pn.startswith('glibc-locale'): + return False + if d.getVar('PN') == "libtool-cross": + return False + if d.getVar('PN') == "libgcc-initial": + return False + if d.getVar('PN') == "shadow-sysroot": + return False + + # We just archive gcc-source for all the gcc related recipes + if d.getVar('BPN') in ['gcc', 'libgcc']: + bb.debug(1, 'spdx: There is bug in scan of %s is, do nothing' % pn) + return False + + return True + + +def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]): + from pathlib import Path + import oe.spdx + import hashlib + + source_date_epoch = d.getVar("SOURCE_DATE_EPOCH") + + sha1s = [] + spdx_files = [] + + file_counter = 1 + for subdir, dirs, files in os.walk(topdir): + dirs[:] = [d for d in dirs if d not in ignore_dirs] + if subdir == str(topdir): + dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] + + for file in files: + filepath = Path(subdir) / file + filename = str(filepath.relative_to(topdir)) + + if filepath.is_file() and not filepath.is_symlink(): + spdx_file = oe.spdx.SPDXFile() + spdx_file.SPDXID = get_spdxid(file_counter) + for t in get_types(filepath): + spdx_file.fileTypes.append(t) + spdx_file.fileName = filename + + hashes = { + "SHA1": hashlib.sha1(), + "SHA256": hashlib.sha256(), + } + + with filepath.open("rb") as f: + while True: + chunk = f.read(4096) + if not chunk: + break + + for h in hashes.values(): + h.update(chunk) + + if archive is not None: + f.seek(0) + info = archive.gettarinfo(fileobj=f) + info.name = filename + info.uid = 0 + info.gid = 0 + info.uname = "root" + info.gname = "root" + + if source_date_epoch is not None and info.mtime > int(source_date_epoch): + info.mtime = int(source_date_epoch) + + archive.addfile(info, f) + + for k, v in hashes.items(): + spdx_file.checksums.append(oe.spdx.SPDXChecksum( + algorithm=k, + checksumValue=v.hexdigest(), + )) + + sha1s.append(hashes["SHA1"].hexdigest()) + + doc.files.append(spdx_file) + doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file) + spdx_pkg.hasFiles.append(spdx_file.SPDXID) + + spdx_files.append(spdx_file) + + file_counter += 1 + + sha1s.sort() + verifier = hashlib.sha1() + for v in sha1s: + verifier.update(v.encode("utf-8")) + spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest() + + return spdx_files + + +def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources): + from pathlib import Path + import hashlib + import oe.packagedata + import oe.spdx + + debug_search_paths = [ + Path(d.getVar('PKGD')), + Path(d.getVar('STAGING_DIR_TARGET')), + Path(d.getVar('STAGING_DIR_NATIVE')), + ] + + pkg_data = oe.packagedata.read_subpkgdata_extended(package, d) + + if pkg_data is None: + return + + for file_path, file_data in pkg_data["files_info"].items(): + if not "debugsrc" in file_data: + continue + + for pkg_file in package_files: + if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"): + break + else: + bb.fatal("No package file found for %s" % str(file_path)) + continue + + for debugsrc in file_data["debugsrc"]: + for search in debug_search_paths: + debugsrc_path = search / debugsrc.lstrip("/") + if not debugsrc_path.exists(): + continue + + with debugsrc_path.open("rb") as f: + sha = hashlib.sha256() + while True: + chunk = f.read(4096) + if not chunk: + break + sha.update(chunk) + + file_sha256 = sha.hexdigest() + + if not file_sha256 in sources: + bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256)) + continue + + source_file = sources[file_sha256] + + doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace) + if doc_ref is None: + doc_ref = oe.spdx.SPDXExternalDocumentRef() + doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name + doc_ref.spdxDocument = source_file.doc.documentNamespace + doc_ref.checksum.algorithm = "SHA1" + doc_ref.checksum.checksumValue = source_file.doc_sha1 + package_doc.externalDocumentRefs.append(doc_ref) + + package_doc.add_relationship( + pkg_file, + "GENERATED_FROM", + "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID), + comment=debugsrc + ) + break + else: + bb.debug(1, "Debug source %s not found" % debugsrc) + + +def collect_dep_recipes(d, doc, spdx_recipe): + from pathlib import Path + import oe.sbom + import oe.spdx + + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + + dep_recipes = [] + taskdepdata = d.getVar("BB_TASKDEPDATA", False) + deps = sorted(set( + dep[0] for dep in taskdepdata.values() if + dep[1] == "do_create_spdx" and dep[0] != d.getVar("PN") + )) + for dep_pn in deps: + dep_recipe_path = deploy_dir_spdx / "recipes" / ("recipe-%s.spdx.json" % dep_pn) + + spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_recipe_path) + + for pkg in spdx_dep_doc.packages: + if pkg.name == dep_pn: + spdx_dep_recipe = pkg + break + else: + continue + + dep_recipes.append(oe.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe)) + + dep_recipe_ref = oe.spdx.SPDXExternalDocumentRef() + dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name + dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace + dep_recipe_ref.checksum.algorithm = "SHA1" + dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1 + + doc.externalDocumentRefs.append(dep_recipe_ref) + + doc.add_relationship( + "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID), + "BUILD_DEPENDENCY_OF", + spdx_recipe + ) + + return dep_recipes + +collect_dep_recipes[vardepsexclude] += "BB_TASKDEPDATA" + + +def collect_dep_sources(d, dep_recipes): + import oe.sbom + + sources = {} + for dep in dep_recipes: + recipe_files = set(dep.recipe.hasFiles) + + for spdx_file in dep.doc.files: + if spdx_file.SPDXID not in recipe_files: + continue + + if "SOURCE" in spdx_file.fileTypes: + for checksum in spdx_file.checksums: + if checksum.algorithm == "SHA256": + sources[checksum.checksumValue] = oe.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file) + break + + return sources + + +python do_create_spdx() { + from datetime import datetime, timezone + import oe.sbom + import oe.spdx + import uuid + from pathlib import Path + from contextlib import contextmanager + import oe.cve_check + + @contextmanager + def optional_tarfile(name, guard, mode="w"): + import tarfile + import bb.compress.zstd + + num_threads = int(d.getVar("BB_NUMBER_THREADS")) + + if guard: + name.parent.mkdir(parents=True, exist_ok=True) + with bb.compress.zstd.open(name, mode=mode + "b", num_threads=num_threads) as f: + with tarfile.open(fileobj=f, mode=mode + "|") as tf: + yield tf + else: + yield None + + bb.build.exec_func("read_subpackage_metadata", d) + + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + spdx_workdir = Path(d.getVar("SPDXWORK")) + include_packaged = d.getVar("SPDX_INCLUDE_PACKAGED") == "1" + include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1" + archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES") == "1" + archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED") == "1" + + creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + doc = oe.spdx.SPDXDocument() + + doc.name = "recipe-" + d.getVar("PN") + doc.documentNamespace = get_doc_namespace(d, doc) + doc.creationInfo.created = creation_time + doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build." + doc.creationInfo.creators.append("Tool: meta-doubleopen") + doc.creationInfo.creators.append("Organization: Double Open Project ()") + doc.creationInfo.creators.append("Person: N/A ()") + + recipe = oe.spdx.SPDXPackage() + recipe.name = d.getVar("PN") + recipe.versionInfo = d.getVar("PV") + recipe.SPDXID = oe.sbom.get_recipe_spdxid(d) + + src_uri = d.getVar('SRC_URI') + if src_uri: + recipe.downloadLocation = src_uri.split()[0] + + homepage = d.getVar("HOMEPAGE") + if homepage: + recipe.homepage = homepage + + license = d.getVar("LICENSE") + if license: + recipe.licenseDeclared = convert_license_to_spdx(license, d) + + summary = d.getVar("SUMMARY") + if summary: + recipe.summary = summary + + description = d.getVar("DESCRIPTION") + if description: + recipe.description = description + + # Some CVEs may be patched during the build process without incrementing the version number, + # so querying for CVEs based on the CPE id can lead to false positives. To account for this, + # save the CVEs fixed by patches to source information field in the SPDX. + patched_cves = oe.cve_check.get_patched_cves(d) + patched_cves = list(patched_cves) + patched_cves = ' '.join(patched_cves) + if patched_cves: + recipe.sourceInfo = "CVEs fixed: " + patched_cves + + cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION")) + if cpe_ids: + for cpe_id in cpe_ids: + cpe = oe.spdx.SPDXExternalReference() + cpe.referenceCategory = "SECURITY" + cpe.referenceType = "http://spdx.org/rdf/references/cpe23Type" + cpe.referenceLocator = cpe_id + recipe.externalRefs.append(cpe) + + doc.packages.append(recipe) + doc.add_relationship(doc, "DESCRIBES", recipe) + + if process_sources(d) and include_sources: + recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.zst") + with optional_tarfile(recipe_archive, archive_sources) as archive: + spdx_get_src(d) + + add_package_files( + d, + doc, + recipe, + spdx_workdir, + lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), file_counter), + lambda filepath: ["SOURCE"], + ignore_dirs=[".git"], + ignore_top_level_dirs=["temp"], + archive=archive, + ) + + if archive is not None: + recipe.packageFileName = str(recipe_archive.name) + + dep_recipes = collect_dep_recipes(d, doc, recipe) + + doc_sha1 = oe.sbom.write_doc(d, doc, "recipes") + dep_recipes.append(oe.sbom.DepRecipe(doc, doc_sha1, recipe)) + + sources = collect_dep_sources(d, dep_recipes) + + pkgdest = Path(d.getVar("PKGDEST")) + for package in d.getVar("PACKAGES").split(): + if not oe.packagedata.packaged(package, d): + continue + + package_doc = oe.spdx.SPDXDocument() + pkg_name = d.getVar("PKG:%s" % package) or package + package_doc.name = pkg_name + package_doc.documentNamespace = get_doc_namespace(d, package_doc) + package_doc.creationInfo.created = creation_time + package_doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build." + package_doc.creationInfo.creators.append("Tool: meta-doubleopen") + package_doc.creationInfo.creators.append("Organization: Double Open Project ()") + package_doc.creationInfo.creators.append("Person: N/A ()") + + recipe_ref = oe.spdx.SPDXExternalDocumentRef() + recipe_ref.externalDocumentId = "DocumentRef-recipe" + recipe_ref.spdxDocument = doc.documentNamespace + recipe_ref.checksum.algorithm = "SHA1" + recipe_ref.checksum.checksumValue = doc_sha1 + + package_doc.externalDocumentRefs.append(recipe_ref) + + package_license = d.getVar("LICENSE:%s" % package) or d.getVar("LICENSE") + + spdx_package = oe.spdx.SPDXPackage() + + spdx_package.SPDXID = oe.sbom.get_package_spdxid(pkg_name) + spdx_package.name = pkg_name + spdx_package.versionInfo = d.getVar("PV") + spdx_package.licenseDeclared = convert_license_to_spdx(package_license, d) + + package_doc.packages.append(spdx_package) + + package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID)) + package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package) + + package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.zst") + with optional_tarfile(package_archive, archive_packaged) as archive: + package_files = add_package_files( + d, + package_doc, + spdx_package, + pkgdest / package, + lambda file_counter: oe.sbom.get_packaged_file_spdxid(pkg_name, file_counter), + lambda filepath: ["BINARY"], + archive=archive, + ) + + if archive is not None: + spdx_package.packageFileName = str(package_archive.name) + + add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources) + + oe.sbom.write_doc(d, package_doc, "packages") +} +# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source +addtask do_create_spdx after do_package do_packagedata do_unpack before do_build do_rm_work + +SSTATETASKS += "do_create_spdx" +do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}" +do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}" + +python do_create_spdx_setscene () { + sstate_setscene(d) +} +addtask do_create_spdx_setscene + +do_create_spdx[dirs] = "${SPDXDEPLOY} ${SPDXWORK}" +do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}" +do_create_spdx[depends] += "${PATCHDEPENDENCY}" +do_create_spdx[deptask] = "do_create_spdx" + +def spdx_get_src(d): + """ + save patched source of the recipe in SPDX_WORKDIR. + """ + import shutil + spdx_workdir = d.getVar('SPDXWORK') + spdx_sysroot_native = d.getVar('STAGING_DIR_NATIVE') + pn = d.getVar('PN') + + workdir = d.getVar("WORKDIR") + + try: + # The kernel class functions require it to be on work-shared, so we dont change WORKDIR + if not is_work_shared(d): + # Change the WORKDIR to make do_unpack do_patch run in another dir. + d.setVar('WORKDIR', spdx_workdir) + # Restore the original path to recipe's native sysroot (it's relative to WORKDIR). + d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native) + + # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the + # possibly requiring of the following tasks (such as some recipes's + # do_patch required 'B' existed). + bb.utils.mkdirhier(d.getVar('B')) + + bb.build.exec_func('do_unpack', d) + # Copy source of kernel to spdx_workdir + if is_work_shared(d): + d.setVar('WORKDIR', spdx_workdir) + d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native) + src_dir = spdx_workdir + "/" + d.getVar('PN')+ "-" + d.getVar('PV') + "-" + d.getVar('PR') + bb.utils.mkdirhier(src_dir) + if bb.data.inherits_class('kernel',d): + share_src = d.getVar('STAGING_KERNEL_DIR') + cmd_copy_share = "cp -rf " + share_src + "/* " + src_dir + "/" + cmd_copy_kernel_result = os.popen(cmd_copy_share).read() + bb.note("cmd_copy_kernel_result = " + cmd_copy_kernel_result) + + git_path = src_dir + "/.git" + if os.path.exists(git_path): + shutils.rmtree(git_path) + + # Make sure gcc and kernel sources are patched only once + if not (d.getVar('SRC_URI') == "" or is_work_shared(d)): + bb.build.exec_func('do_patch', d) + + # Some userland has no source. + if not os.path.exists( spdx_workdir ): + bb.utils.mkdirhier(spdx_workdir) + finally: + d.setVar("WORKDIR", workdir) + +do_rootfs[recrdeptask] += "do_create_spdx" + +ROOTFS_POSTUNINSTALL_COMMAND =+ "image_combine_spdx ; " +python image_combine_spdx() { + import os + import oe.spdx + import oe.sbom + import io + import json + from oe.rootfs import image_list_installed_packages + from datetime import timezone, datetime + from pathlib import Path + import tarfile + import bb.compress.zstd + + creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + image_name = d.getVar("IMAGE_NAME") + image_link_name = d.getVar("IMAGE_LINK_NAME") + + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + imgdeploydir = Path(d.getVar("IMGDEPLOYDIR")) + source_date_epoch = d.getVar("SOURCE_DATE_EPOCH") + + doc = oe.spdx.SPDXDocument() + doc.name = image_name + doc.documentNamespace = get_doc_namespace(d, doc) + doc.creationInfo.created = creation_time + doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build." + doc.creationInfo.creators.append("Tool: meta-doubleopen") + doc.creationInfo.creators.append("Organization: Double Open Project ()") + doc.creationInfo.creators.append("Person: N/A ()") + + image = oe.spdx.SPDXPackage() + image.name = d.getVar("PN") + image.versionInfo = d.getVar("PV") + image.SPDXID = oe.sbom.get_image_spdxid(image_name) + + doc.packages.append(image) + + spdx_package = oe.spdx.SPDXPackage() + + packages = image_list_installed_packages(d) + + for name in sorted(packages.keys()): + pkg_spdx_path = deploy_dir_spdx / "packages" / (name + ".spdx.json") + pkg_doc, pkg_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path) + + for p in pkg_doc.packages: + if p.name == name: + pkg_ref = oe.spdx.SPDXExternalDocumentRef() + pkg_ref.externalDocumentId = "DocumentRef-%s" % pkg_doc.name + pkg_ref.spdxDocument = pkg_doc.documentNamespace + pkg_ref.checksum.algorithm = "SHA1" + pkg_ref.checksum.checksumValue = pkg_doc_sha1 + + doc.externalDocumentRefs.append(pkg_ref) + doc.add_relationship(image, "CONTAINS", "%s:%s" % (pkg_ref.externalDocumentId, p.SPDXID)) + break + else: + bb.fatal("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path)) + + image_spdx_path = imgdeploydir / (image_name + ".spdx.json") + + with image_spdx_path.open("wb") as f: + doc.to_json(f, sort_keys=True) + + image_spdx_link = imgdeploydir / (image_link_name + ".spdx.json") + image_spdx_link.symlink_to(os.path.relpath(image_spdx_path, image_spdx_link.parent)) + + num_threads = int(d.getVar("BB_NUMBER_THREADS")) + + visited_docs = set() + + index = {"documents": []} + + spdx_tar_path = imgdeploydir / (image_name + ".spdx.tar.zst") + with bb.compress.zstd.open(spdx_tar_path, "w", num_threads=num_threads) as f: + with tarfile.open(fileobj=f, mode="w|") as tar: + def collect_spdx_document(path): + nonlocal tar + nonlocal deploy_dir_spdx + nonlocal source_date_epoch + nonlocal index + + if path in visited_docs: + return + + visited_docs.add(path) + + with path.open("rb") as f: + doc = oe.spdx.SPDXDocument.from_json(f) + f.seek(0) + + if doc.documentNamespace in visited_docs: + return + + bb.note("Adding SPDX document %s" % path) + visited_docs.add(doc.documentNamespace) + info = tar.gettarinfo(fileobj=f) + + info.name = doc.name + ".spdx.json" + info.uid = 0 + info.gid = 0 + info.uname = "root" + info.gname = "root" + + if source_date_epoch is not None and info.mtime > int(source_date_epoch): + info.mtime = int(source_date_epoch) + + tar.addfile(info, f) + + index["documents"].append({ + "filename": info.name, + "documentNamespace": doc.documentNamespace, + }) + + for ref in doc.externalDocumentRefs: + ref_path = deploy_dir_spdx / "by-namespace" / ref.spdxDocument.replace("/", "_") + collect_spdx_document(ref_path) + + collect_spdx_document(image_spdx_path) + + index["documents"].sort(key=lambda x: x["filename"]) + + index_str = io.BytesIO(json.dumps(index, sort_keys=True).encode("utf-8")) + + info = tarfile.TarInfo() + info.name = "index.json" + info.size = len(index_str.getvalue()) + info.uid = 0 + info.gid = 0 + info.uname = "root" + info.gname = "root" + + tar.addfile(info, fileobj=index_str) + + spdx_tar_link = imgdeploydir / (image_link_name + ".spdx.tar.zst") + spdx_tar_link.symlink_to(os.path.relpath(spdx_tar_path, spdx_tar_link.parent)) +} + diff --git a/meta/lib/oe/sbom.py b/meta/lib/oe/sbom.py new file mode 100644 index 0000000000..d40e5b792f --- /dev/null +++ b/meta/lib/oe/sbom.py @@ -0,0 +1,63 @@ +# +# SPDX-License-Identifier: GPL-2.0-only +# + +import collections + +DepRecipe = collections.namedtuple("DepRecipe", ("doc", "doc_sha1", "recipe")) +DepSource = collections.namedtuple("DepSource", ("doc", "doc_sha1", "recipe", "file")) + + +def get_recipe_spdxid(d): + return "SPDXRef-%s-%s" % ("Recipe", d.getVar("PN")) + + +def get_package_spdxid(pkg): + return "SPDXRef-Package-%s" % pkg + + +def get_source_file_spdxid(d, idx): + return "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), idx) + + +def get_packaged_file_spdxid(pkg, idx): + return "SPDXRef-PackagedFile-%s-%d" % (pkg, idx) + + +def get_image_spdxid(img): + return "SPDXRef-Image-%s" % img + + +def write_doc(d, spdx_doc, subdir): + from pathlib import Path + + spdx_deploy = Path(d.getVar("SPDXDEPLOY")) + + dest = spdx_deploy / subdir / (spdx_doc.name + ".spdx.json") + dest.parent.mkdir(exist_ok=True, parents=True) + with dest.open("wb") as f: + doc_sha1 = spdx_doc.to_json(f, sort_keys=True) + + l = spdx_deploy / "by-namespace" / spdx_doc.documentNamespace.replace("/", "_") + l.parent.mkdir(exist_ok=True, parents=True) + l.symlink_to(os.path.relpath(dest, l.parent)) + + return doc_sha1 + + +def read_doc(filename): + import hashlib + import oe.spdx + + with filename.open("rb") as f: + sha1 = hashlib.sha1() + while True: + chunk = f.read(4096) + if not chunk: + break + sha1.update(chunk) + + f.seek(0) + doc = oe.spdx.SPDXDocument.from_json(f) + + return (doc, sha1.hexdigest()) diff --git a/meta/lib/oe/spdx.py b/meta/lib/oe/spdx.py new file mode 100644 index 0000000000..3f569c6862 --- /dev/null +++ b/meta/lib/oe/spdx.py @@ -0,0 +1,263 @@ +# +# SPDX-License-Identifier: GPL-2.0-only +# + +import hashlib +import itertools +import json + +SPDX_VERSION = "2.2" + + +class _Property(object): + def __init__(self, *, default=None): + self.default = default + + def setdefault(self, dest, name): + if self.default is not None: + dest.setdefault(name, self.default) + + +class _String(_Property): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def set_property(self, attrs, name): + def get_helper(obj): + return obj._spdx[name] + + def set_helper(obj, value): + obj._spdx[name] = value + + def del_helper(obj): + del obj._spdx[name] + + attrs[name] = property(get_helper, set_helper, del_helper) + + def init(self, source): + return source + + +class _Object(_Property): + def __init__(self, cls, **kwargs): + super().__init__(**kwargs) + self.cls = cls + + def set_property(self, attrs, name): + def get_helper(obj): + if not name in obj._spdx: + obj._spdx[name] = self.cls() + return obj._spdx[name] + + def set_helper(obj, value): + obj._spdx[name] = value + + def del_helper(obj): + del obj._spdx[name] + + attrs[name] = property(get_helper, set_helper) + + def init(self, source): + return self.cls(**source) + + +class _ListProperty(_Property): + def __init__(self, prop, **kwargs): + super().__init__(**kwargs) + self.prop = prop + + def set_property(self, attrs, name): + def get_helper(obj): + if not name in obj._spdx: + obj._spdx[name] = [] + return obj._spdx[name] + + def del_helper(obj): + del obj._spdx[name] + + attrs[name] = property(get_helper, None, del_helper) + + def init(self, source): + return [self.prop.init(o) for o in source] + + +class _StringList(_ListProperty): + def __init__(self, **kwargs): + super().__init__(_String(), **kwargs) + + +class _ObjectList(_ListProperty): + def __init__(self, cls, **kwargs): + super().__init__(_Object(cls), **kwargs) + + +class MetaSPDXObject(type): + def __new__(mcls, name, bases, attrs): + attrs["_properties"] = {} + + for key in attrs.keys(): + if isinstance(attrs[key], _Property): + prop = attrs[key] + attrs["_properties"][key] = prop + prop.set_property(attrs, key) + + return super().__new__(mcls, name, bases, attrs) + + +class SPDXObject(metaclass=MetaSPDXObject): + def __init__(self, **d): + self._spdx = {} + + for name, prop in self._properties.items(): + prop.setdefault(self._spdx, name) + if name in d: + self._spdx[name] = prop.init(d[name]) + + def serializer(self): + return self._spdx + + def __setattr__(self, name, value): + if name in self._properties or name == "_spdx": + super().__setattr__(name, value) + return + raise KeyError("%r is not a valid SPDX property" % name) + + +class SPDXChecksum(SPDXObject): + algorithm = _String() + checksumValue = _String() + + +class SPDXRelationship(SPDXObject): + spdxElementId = _String() + relatedSpdxElement = _String() + relationshipType = _String() + comment = _String() + + +class SPDXExternalReference(SPDXObject): + referenceCategory = _String() + referenceType = _String() + referenceLocator = _String() + + +class SPDXPackageVerificationCode(SPDXObject): + packageVerificationCodeValue = _String() + packageVerificationCodeExcludedFiles = _StringList() + + +class SPDXPackage(SPDXObject): + name = _String() + SPDXID = _String() + versionInfo = _String() + downloadLocation = _String(default="NOASSERTION") + packageSupplier = _String(default="NOASSERTION") + homepage = _String() + licenseConcluded = _String(default="NOASSERTION") + licenseDeclared = _String(default="NOASSERTION") + summary = _String() + description = _String() + sourceInfo = _String() + copyrightText = _String(default="NOASSERTION") + licenseInfoFromFiles = _StringList(default=["NOASSERTION"]) + externalRefs = _ObjectList(SPDXExternalReference) + packageVerificationCode = _Object(SPDXPackageVerificationCode) + hasFiles = _StringList() + packageFileName = _String() + + +class SPDXFile(SPDXObject): + SPDXID = _String() + fileName = _String() + licenseConcluded = _String(default="NOASSERTION") + copyrightText = _String(default="NOASSERTION") + licenseInfoInFiles = _StringList(default=["NOASSERTION"]) + checksums = _ObjectList(SPDXChecksum) + fileTypes = _StringList() + + +class SPDXCreationInfo(SPDXObject): + created = _String() + licenseListVersion = _String() + comment = _String() + creators = _StringList() + + +class SPDXExternalDocumentRef(SPDXObject): + externalDocumentId = _String() + spdxDocument = _String() + checksum = _Object(SPDXChecksum) + + +class SPDXDocument(SPDXObject): + spdxVersion = _String(default="SPDX-" + SPDX_VERSION) + dataLicense = _String(default="CC0-1.0") + SPDXID = _String(default="SPDXRef-DOCUMENT") + name = _String() + documentNamespace = _String() + creationInfo = _Object(SPDXCreationInfo) + packages = _ObjectList(SPDXPackage) + files = _ObjectList(SPDXFile) + relationships = _ObjectList(SPDXRelationship) + externalDocumentRefs = _ObjectList(SPDXExternalDocumentRef) + + def __init__(self, **d): + super().__init__(**d) + + def to_json(self, f, *, sort_keys=False, indent=None, separators=None): + class Encoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, SPDXObject): + return o.serializer() + + return super().default(o) + + sha1 = hashlib.sha1() + for chunk in Encoder( + sort_keys=sort_keys, + indent=indent, + separators=separators, + ).iterencode(self): + chunk = chunk.encode("utf-8") + f.write(chunk) + sha1.update(chunk) + + return sha1.hexdigest() + + @classmethod + def from_json(cls, f): + return cls(**json.load(f)) + + def add_relationship(self, _from, relationship, _to, *, comment=None): + if isinstance(_from, SPDXObject): + from_spdxid = _from.SPDXID + else: + from_spdxid = _from + + if isinstance(_to, SPDXObject): + to_spdxid = _to.SPDXID + else: + to_spdxid = _to + + r = SPDXRelationship( + spdxElementId=from_spdxid, + relatedSpdxElement=to_spdxid, + relationshipType=relationship, + ) + + if comment is not None: + r.comment = comment + + self.relationships.append(r) + + def find_by_spdxid(self, spdxid): + for o in itertools.chain(self.packages, self.files): + if o.SPDXID == spdxid: + return o + return None + + def find_external_document_ref(self, namespace): + for r in self.externalDocumentRefs: + if r.spdxDocument == namespace: + return r + return None -- cgit 1.2.3-korg