svn commit: r360877 - head/tools/pkgbase

Ed Maste emaste at FreeBSD.org
Sun May 10 16:11:20 UTC 2020


Author: emaste
Date: Sun May 10 16:11:19 2020
New Revision: 360877
URL: https://svnweb.freebsd.org/changeset/base/360877

Log:
  Add pkgbase METALOG parse/check tool
  
  `metalog.lua` is a script that reads METALOG file created by pkgbase
  (make packages) and generates reports about the installed system
  and issues.
  
  This was developed as part of Yang's W2020 University of Waterloo co-
  operative education term with the FreeBSD Foundation.  kevans provided
  some initial review; we will iterate on it in the tree.
  
  Submitted by:	Yang Wang <2333 at outlook.jp>
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D24563

Added:
  head/tools/pkgbase/
  head/tools/pkgbase/README.md   (contents, props changed)
  head/tools/pkgbase/metalog_reader.lua   (contents, props changed)

Added: head/tools/pkgbase/README.md
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/pkgbase/README.md	Sun May 10 16:11:19 2020	(r360877)
@@ -0,0 +1,61 @@
+`metalog.lua` is a script that reads METALOG file created by pkgbase
+(make packages) and generates reports about the installed system
+and issues
+
+the script accepts an mtree file in a format that's returned by
+`mtree -c | mtree -C`
+
+synopsis:
+```
+metalog_reader.lua [-h] [-a | -c | -p [-count] [-size] [-f...]] [-W...] [-v] metalog-path
+```
+
+options:
+
+* `-a` prints all scan results. this is the default option if no option is
+  provided.
+* `-c` lints the file and gives warnings/errors, including duplication and
+  conflicting metadata
+  *  `-Wcheck-notagdir` entries with dir type and no tags will be also included
+     the first time they appear (1)
+* `-p` list all package names found in the file as exactly specified by
+  `tags=package=...`
+  * `-count` display the number of files of the package
+  * `-size` display the size of the package
+  * `-fsetgid` only include packages with setgid files
+  * `-fsetuid` only include packages with setuid files
+  * `-fsetid` only include packages with setgid or setuid files
+* `-v` verbose mode
+* `-h` help page
+
+some examples:
+
+* `metalog_reader.lua -a METALOG`
+  prints all scan results described below. this is the default option
+* `metalog_reader.lua -c METALOG`
+  only prints errors and warnings found in the file
+* `metalog_reader.lua -c -Wcheck-notagdir METALOG`
+  prints errors and warnings found in the file, including directories with no
+  tags
+* `metalog_reader.lua -p METALOG`
+  only prints all the package names found in the file
+* `metalog_reader.lua -p -count -size METALOG`
+  prints all the package names, followed by number of files, followed by total
+  size
+* `metalog_reader.lua -p -size -fsetid METALOG`
+  prints packages that has either setuid/setgid files, followed by the total
+  size
+* `metalog_reader.lua -p -fsetuid -fsetgid METALOG`
+  prints packages that has both setuid and setgid files (if more than one
+  filters are specified, they are composed using logic and)
+* `metalog_reader.lua -p -count -size -fsetuid METALOG`
+  prints packages that has setuid files, followed by number of files and total
+  size
+
+(1) if we have two entries
+```
+./bin type=dir uname=root gname=wheel mode=0755
+./bin type=dir uname=root gname=wheel mode=0755 tags=...
+```
+by default, this is not warned. if the option is enabled, this will be warned
+as the second line sufficiently covers the first line.

Added: head/tools/pkgbase/metalog_reader.lua
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/pkgbase/metalog_reader.lua	Sun May 10 16:11:19 2020	(r360877)
@@ -0,0 +1,521 @@
+#!/usr/libexec/flua
+
+-- SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+--
+-- Copyright(c) 2020 The FreeBSD Foundation.
+--
+-- Redistribution and use in source and binary forms, with or without
+-- modification, are permitted provided that the following conditions
+-- are met:
+-- 1. Redistributions of source code must retain the above copyright
+--    notice, this list of conditions and the following disclaimer.
+-- 2. Redistributions in binary form must reproduce the above copyright
+--    notice, this list of conditions and the following disclaimer in the
+--    documentation and/or other materials provided with the distribution.
+--
+-- THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-- ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+-- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-- SUCH DAMAGE.
+
+-- $FreeBSD$
+
+function main(args)
+	if #args == 0 then usage() end
+	local filename
+	local printall, checkonly, pkgonly =
+	    #args == 1, false, false
+	local dcount, dsize, fuid, fgid, fid =
+	    false, false, false, false, false
+	local verbose = false
+	local w_notagdirs = false
+
+	local i = 1
+	while i <= #args do
+		if args[i] == '-h' then
+			usage(true)
+		elseif args[i] == '-a' then
+			printall = true
+		elseif args[i] == '-c' then
+			printall = false
+			checkonly = true
+		elseif args[i] == '-p' then
+			printall = false
+			pkgonly = true
+			while i < #args do
+				i = i+1
+				if args[i] == '-count' then
+					dcount = true
+				elseif args[i] == '-size' then
+					dsize = true
+				elseif args[i] == '-fsetuid' then
+					fuid = true
+				elseif args[i] == '-fsetgid' then
+					fgid = true
+				elseif args[i] == '-fsetid' then
+					fid = true
+				else
+					i = i-1
+					break
+				end
+			end
+		elseif args[i] == '-v' then
+			verbose = true
+		elseif args[i] == '-Wcheck-notagdir' then
+			w_notagdirs = true
+		elseif args[i]:match('^%-') then
+			io.stderr:write('Unknown argument '..args[i]..'.\n')
+			usage()
+		else
+			filename = args[i]
+		end
+		i = i+1
+	end
+
+	if filename == nil then
+		io.stderr:write('Missing filename.\n')
+		usage()
+	end
+
+	local sess = Analysis_session(filename, verbose, w_notagdirs)
+
+	if printall then
+		io.write('--- PACKAGE REPORTS ---\n')
+		io.write(sess.pkg_report_full())
+		io.write('--- LINTING REPORTS ---\n')
+		print_lints(sess)
+	elseif checkonly then
+		print_lints(sess)
+	elseif pkgonly then
+		io.write(sess.pkg_report_simple(dcount, dsize, {
+			fuid and sess.pkg_issetuid or nil,
+			fgid and sess.pkg_issetgid or nil,
+			fid and sess.pkg_issetid or nil
+		}))
+	else
+		io.stderr:write('This text should not be displayed.')
+		usage()
+	end
+end
+
+--- @param man boolean
+function usage(man)
+	local sn = 'Usage: '..arg[0].. ' [-h] [-a | -c | -p [-count] [-size] [-f...]] [-W...] metalog-path \n'
+	if man then
+		io.write('\n')
+		io.write(sn)
+		io.write(
+[[
+
+The script reads METALOG file created by pkgbase (make packages) and generates
+reports about the installed system and issues.  It accepts an mtree file in a
+format that's returned by `mtree -c | mtree -C`
+
+  Options:
+  -a         prints all scan results. this is the default option if no option
+             is provided.
+  -c         lints the file and gives warnings/errors, including duplication
+             and conflicting metadata
+      -Wcheck-notagdir    entries with dir type and no tags will be also
+                          included the first time they appear
+  -p         list all package names found in the file as exactly specified by
+             `tags=package=...`
+      -count       display the number of files of the package
+      -size        display the size of the package
+      -fsetgid     only include packages with setgid files
+      -fsetuid     only include packages with setuid files
+      -fsetid      only include packages with setgid or setuid files
+  -v          verbose mode
+  -h          help page
+
+]])
+		os.exit()
+	else
+		io.stderr:write(sn)
+		os.exit(1)
+	end
+end
+
+--- @param sess Analysis_session
+function print_lints(sess)
+	local dupwarn, duperr = sess.dup_report()
+	io.write(dupwarn)
+	io.write(duperr)
+	local inodewarn, inodeerr = sess.inode_report()
+	io.write(inodewarn)
+	io.write(inodeerr)
+end
+
+--- @param t table
+function sortedPairs(t)
+	local sortedk = {}
+	for k in next, t do sortedk[#sortedk+1] = k end
+	table.sort(sortedk)
+	local i = 0
+	return function()
+		i = i + 1
+		return sortedk[i], t[sortedk[i]]
+	end
+end
+
+--- @param t table <T, U>
+--- @param f function <U -> U>
+function table_map(t, f)
+	local res = {}
+	for k, v in pairs(t) do res[k] = f(v) end
+	return res
+end
+
+--- @class MetalogRow
+-- a table contaning file's info, from a line content from METALOG file
+-- all fields in the table are strings
+-- sample output:
+--	{
+--		filename = ./usr/share/man/man3/inet6_rthdr_segments.3.gz
+--		lineno = 5
+--		attrs = {
+--			gname = 'wheel'
+--			uname = 'root'
+--			mode = '0444'
+--			size = '1166'
+--			time = nil
+--			type = 'file'
+--			tags = 'package=clibs,debug'
+--		}
+--	}
+--- @param line string
+function MetalogRow(line, lineno)
+	local res, attrs = {}, {}
+	local filename, rest = line:match('^(%S+) (.+)$')
+	-- mtree file has space escaped as '\\040', not affecting splitting
+	-- string by space
+	for attrpair in rest:gmatch('[^ ]+') do
+		local k, v = attrpair:match('^(.-)=(.+)')
+		attrs[k] = v
+	end
+	res.filename = filename
+	res.linenum = lineno
+	res.attrs = attrs
+	return res
+end
+
+-- check if an array of MetalogRows are equivalent. if not, the first field
+-- that's different is returned secondly
+--- @param rows MetalogRow[]
+--- @param ignore_name boolean
+--- @param ignore_tags boolean
+function metalogrows_all_equal(rows, ignore_name, ignore_tags)
+	local __eq = function(l, o)
+		if not ignore_name and l.filename ~= o.filename then
+			return false, 'filename'
+		end
+		-- ignoring linenum in METALOG file as it's not relavant
+		for k in pairs(l.attrs) do
+			if ignore_tags and k == 'tags' then goto continue end
+			if l.attrs[k] ~= o.attrs[k] and o.attrs[k] ~= nil then
+				return false, k
+			end
+			::continue::
+		end
+		return true
+	end
+	for _, v in ipairs(rows) do
+		local bol, offby = __eq(v, rows[1])
+		if not bol then return false, offby end
+	end
+	return true
+end
+
+--- @param tagstr string
+function pkgname_from_tag(tagstr)
+	local ext, pkgname, pkgend = '', '', ''
+	for seg in tagstr:gmatch('[^,]+') do
+		if seg:match('package=') then
+			pkgname = seg:sub(9)
+		elseif seg == 'development' or seg == 'profile'
+			or seg == 'debug' or seg == 'docs' then
+			pkgend = seg
+		else
+			ext = ext == '' and seg or ext..'-'..seg
+		end
+	end
+	pkgname = pkgname
+		..(ext == '' and '' or '-'..ext)
+		..(pkgend == '' and '' or '-'..pkgend)
+	return pkgname
+end
+
+--- @class Analysis_session
+--- @param metalog string
+--- @param verbose boolean
+--- @param w_notagdirs boolean turn on to also check directories
+function Analysis_session(metalog, verbose, w_notagdirs)
+	local files = {} -- map<string, MetalogRow[]>
+	-- set is map<elem, bool>. if bool is true then elem exists
+	local pkgs = {} -- map<string, set<string>>
+	----- used to keep track of files not belonging to a pkg. not used so
+	----- it is commented with -----
+	-----local nopkg = {} --            set<string>
+	--- @public
+	local swarn = {}
+	--- @public
+	local serrs = {}
+
+	-- returns number of files in package and size of package
+	-- nil is  returned upon errors
+	--- @param pkgname string
+	local function pkg_size(pkgname)
+		local filecount, sz = 0, 0
+		for filename in pairs(pkgs[pkgname]) do
+			local rows = files[filename]
+			-- normally, there should be only one row per filename
+			-- if these rows are equal, there should be warning, but it
+			-- does not affect size counting. if not, it is an error
+			if #rows > 1 and not metalogrows_all_equal(rows) then
+				return nil
+			end
+			local row = rows[1]
+			if row.attrs.type == 'file' then
+				sz = sz + tonumber(row.attrs.size)
+			end
+			filecount = filecount + 1
+		end
+		return filecount, sz
+	end
+
+	--- @param pkgname string
+	--- @param mode number
+	local function pkg_ismode(pkgname, mode)
+		for filename in pairs(pkgs[pkgname]) do
+			for _, row in ipairs(files[filename]) do
+				if tonumber(row.attrs.mode, 8) & mode ~= 0 then
+					return true
+				end
+			end
+		end
+		return false
+	end
+
+	--- @param pkgname string
+	--- @public
+	local function pkg_issetuid(pkgname)
+		return pkg_ismode(pkgname, 2048)
+	end
+
+	--- @param pkgname string
+	--- @public
+	local function pkg_issetgid(pkgname)
+		return pkg_ismode(pkgname, 1024)
+	end
+
+	--- @param pkgname string
+	--- @public
+	local function pkg_issetid(pkgname)
+		return pkg_issetuid(pkgname) or pkg_issetgid(pkgname)
+	end
+
+	-- sample return:
+	-- { [*string]: { count=1, size=2, issetuid=true, issetgid=true } }
+	local function pkg_report_helper_table()
+		local res = {}
+		for pkgname in pairs(pkgs) do
+			res[pkgname] = {}
+			res[pkgname].count,
+			res[pkgname].size = pkg_size(pkgname)
+			res[pkgname].issetuid = pkg_issetuid(pkgname)
+			res[pkgname].issetgid = pkg_issetgid(pkgname)
+		end
+		return res
+	end
+
+	-- returns a string describing package scan report
+	--- @public
+	local function pkg_report_full()
+		local sb = {}
+		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
+			sb[#sb+1] = 'Package '..pkgname..':'
+			if v.issetuid or v.issetgid then
+				sb[#sb+1] = ''..table.concat({
+					v.issetuid and ' setuid' or '',
+					v.issetgid and ' setgid' or '' }, '')
+			end
+			sb[#sb+1] = '\n  number of files: '..(v.count or '?')
+				..'\n  total size: '..(v.size or '?')
+			sb[#sb+1] = '\n'
+		end
+		return table.concat(sb, '')
+	end
+
+	--- @param have_count boolean
+	--- @param have_size boolean
+	--- @param filters function[]
+	--- @public
+	-- returns a string describing package size report.
+	-- sample: "mypackage 2 2048"* if both booleans are true
+	local function pkg_report_simple(have_count, have_size, filters)
+		filters = filters or {}
+		local sb = {}
+		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
+			local pred = true
+			-- doing a foldl to all the function results with (and)
+			for _, f in pairs(filters) do pred = pred and f(pkgname) end
+			if pred then
+				sb[#sb+1] = pkgname..table.concat({
+					have_count and (' '..(v.count or '?')) or '',
+					have_size and (' '..(v.size or '?')) or ''}, '')
+					..'\n'
+			end
+		end
+		return table.concat(sb, '')
+	end
+
+	-- returns a string describing duplicate file warnings,
+	-- returns a string describing duplicate file errors
+	--- @public
+	local function dup_report()
+		local warn, errs = {}, {}
+		for filename, rows in sortedPairs(files) do
+			if #rows == 1 then goto continue end
+			local iseq, offby = metalogrows_all_equal(rows)
+			if iseq then -- repeated line, just a warning
+				warn[#warn+1] = 'warning: '..filename
+					..' repeated with same meta: line '
+					..table.concat(
+						table_map(rows, function(e) return e.linenum end), ',')
+				warn[#warn+1] = '\n'
+			elseif not metalogrows_all_equal(rows, false, true) then
+			-- same filename (possibly different tags), different metadata, an error
+				errs[#errs+1] = 'error: '..filename
+					..' exists in multiple locations and with different meta: line '
+					..table.concat(
+						table_map(rows, function(e) return e.linenum end), ',')
+					..'. off by "'..offby..'"'
+				errs[#errs+1] = '\n'
+			end
+			::continue::
+		end
+		return table.concat(warn, ''), table.concat(errs, '')
+	end
+
+	-- returns a string describing warnings of found hard links
+	-- returns a string describing errors of found hard links
+	--- @public
+	local function inode_report()
+		-- obtain inodes of filenames
+		local attributes = require('lfs').attributes
+		local inm = {} -- map<number, string[]>
+		local unstatables = {} -- string[]
+		for filename in pairs(files) do
+			-- i only took the first row of a filename,
+			-- and skip links and folders
+			if files[filename][1].attrs.type ~= 'file' then
+				goto continue
+			end
+			-- make ./xxx become /xxx so that we can stat
+			filename = filename:sub(2)
+			local fs = attributes(filename)
+			if fs == nil then
+				unstatables[#unstatables+1] = filename
+				goto continue
+			end
+			local inode = fs.ino
+			inm[inode] = inm[inode] or {}
+			-- add back the dot prefix
+			table.insert(inm[inode], '.'..filename)
+			::continue::
+		end
+
+		local warn, errs = {}, {}
+		for _, filenames in pairs(inm) do
+			if #filenames == 1 then goto continue end
+			-- i only took the first row of a filename
+			local rows = table_map(filenames, function(e)
+				return files[e][1]
+			end)
+			local iseq, offby = metalogrows_all_equal(rows, true, true)
+			if not iseq then
+				errs[#errs+1] = 'error: '
+					..'entries point to the same inode but have different meta: '
+					..table.concat(filenames, ',')..' in line '
+					..table.concat(
+						table_map(rows, function(e) return e.linenum end), ',')
+					..'. off by "'..offby..'"'
+				errs[#errs+1] = '\n'
+			end
+			::continue::
+		end
+
+		if #unstatables > 0 then
+			warn[#warn+1] = verbose and
+				'note: skipped checking inodes: '..table.concat(unstatables, ',')..'\n'
+				or
+				'note: skipped checking inodes for '..#unstatables..' entries\n'
+		end
+
+		return table.concat(warn, ''), table.concat(errs, '')
+	end
+
+	do
+	local fp, errmsg, errcode = io.open(metalog, 'r')
+	if fp == nil then
+		io.stderr:write('cannot open '..metalog..': '..errmsg..': '..errcode..'\n')
+		os.exit(1)
+	end
+
+	-- scan all lines and put file data into the dictionaries
+	local firsttimes = {} -- set<string>
+	local lineno = 0
+	for line in fp:lines() do
+		-----local isinpkg = false
+		lineno = lineno + 1
+		-- skip lines begining with #
+		if line:match('^%s*#') then goto continue end
+		-- skip blank lines
+		if line:match('^%s*$') then goto continue end
+
+		local data = MetalogRow(line, lineno)
+		-- entries with dir and no tags... ignore for the first time
+		if not w_notagdirs and
+			data.attrs.tags == nil and data.attrs.type == 'dir'
+			and not firsttimes[data.filename] then
+			firsttimes[data.filename] = true
+			goto continue
+		end
+
+		files[data.filename] = files[data.filename] or {}
+		table.insert(files[data.filename], data)
+
+		if data.attrs.tags ~= nil then
+			pkgname = pkgname_from_tag(data.attrs.tags)
+			pkgs[pkgname] = pkgs[pkgname] or {}
+			pkgs[pkgname][data.filename] = true
+			------isinpkg = true
+		end
+		-----if not isinpkg then nopkg[data.filename] = true end
+		::continue::
+	end
+
+	fp:close()
+	end
+
+	return {
+		warn = swarn,
+		errs = serrs,
+		pkg_issetuid = pkg_issetuid,
+		pkg_issetgid = pkg_issetgid,
+		pkg_issetid = pkg_issetid,
+		pkg_report_full = pkg_report_full,
+		pkg_report_simple = pkg_report_simple,
+		dup_report = dup_report,
+		inode_report = inode_report
+	}
+end
+
+main(arg)


More information about the svn-src-head mailing list