# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
#==============================================================================
# ===============
# ceph_support.py
# ===============
# Utility functions for ceph_* tools
# Copyright (C) 2017, 2018 Peter Linich
#==============================================================================
# =======
# License
# =======
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
Text 1
Text 2
") # # gives: # # {'.html.0.head.0.title.0': 'My title', '.html.0.body.0.p.0': 'Text ', '.html.0.body.0.p.0.b.0': '1', '.html.0.body.0.h1.0': 'TITLE', '.html.0.body.0.p.1': 'Text 2'} # # Note that each dictionary key represents the nested path to a tag. Each # tag component is followed by its index at its nesting level. This can # be seen where the twotags at the same level are shown with keys # of "p.0" and "p.1" class ParseXMLError (Exception): pass class ParseXMLInputError (ParseXMLError): pass def ParseXML (buffer): # print "Enter" # ---------------------------------------------------------- # Set up our push/pop stack for tracking the nesting of tags # ---------------------------------------------------------- sp = -1 # Empty stack is -1. Increments for each pushed tag a = {} # Dictionary of all paths with text/string values. This is # basically our output product. Paths without associated # text are not stored idx = {} # Dictionary tracking instance index of each identical # tag at same nesting level. E.g. where, say, two
# tag pairs follow each other, we use an index to uniquely # identify each's text path = {} # Push/pop stack of path string corresponding to tag nesting tagstack = {} # Push/pop stack of nested tags # path[sp] = "." # Initial path path[sp] = "" # Initial path # ------------------------------------------ # i is our current index into the buffer and # n, obviously, is the length of the buffer # ------------------------------------------ i = 0 n = len (buffer) # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # Outer loop is for complete tokens - either tag or text/value # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ while i < n: # ---------------------- # Get the next character # ---------------------- c = buffer[i] # print "c =", c, "i =", i, "n =", n i += 1 # --------------------------------------------------------- # A value string starts with any character other than a "<" # and ends at either a "<" or at the end of the buffer # --------------------------------------------------------- if c != "<": # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # Parse out the value and store in s # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ s = [c] while i < n: # ---------------------- # Get the next character # ---------------------- c = buffer[i] # print "c =", c, "i =", i, "n =", n i += 1 # ---------------------------------------- # If it's the start of a tag then we break # out of this inner loop where we'll store # the value and then fall through to the # tag parser # ---------------------------------------- if c == "<": break # ------------------------------------------------ # Append the current character to the value string # ------------------------------------------------ s.append (c) # -------------------------------------------------- # We get here if we encounter the start of a tag # or the end of the buffer while extracting a value. # In either case, we have a value string at this # point so we store it away # -------------------------------------------------- # a[path[sp]] = s v = "".join (s) # print "Value is", v a[path[sp]] = v # -------------------------------------------- # Break out of the outer loop at end of buffer # -------------------------------------------- if i >= n: # ---------------------------------- # It's an ooops if we got here after # detecting the start of a tag (and # didn't get the end of it before # the buffer ended) # ---------------------------------- if c == "<": raise ParseXMLInputError ("End of buffer immediately after \"<\"") break # ============================================ # When we get here we must have just seen # a "<". Start parsing tag with optional slash # ============================================ # ---------------------------------------- # Get first character of tag name or slash # ---------------------------------------- c = buffer[i] # print "c =", c, "i =", i, "n =", n i += 1 # ------------------------------------------- # Initialise tag "string" and check for slash # ------------------------------------------- s = [] if c == "/": # -------------------- # Got preceding slash # -------------------- slash = True # ------------------------------------------------------ # Error if we reach end of buffer in the middle of a tag # ------------------------------------------------------ if i >= n: raise ParseXMLInputError ("End of buffer immediately after \"\"") # ------------------------------------ # Get the next character and loop back # ------------------------------------ c = buffer[i] # print "c =", c, "i =", i, "n =", n i += 1 else: # ------------------- # No slash before tag # ------------------- slash = False # $$$$$$$$$$$$$$$$$$$$$$ # Parse out the tag name # $$$$$$$$$$$$$$$$$$$$$$ while True: # -------------------- # Check for end of tag # -------------------- if c == ">": # ------ # Got it # ------ t = "".join (s) # print "Tag is", t, "Slash is", slash # ------------ # Sanity check # ------------ if t == "": raise ParseXMLInputError ("Got zero-length tag name") # ---------------------- # Open tag or close tag? # ---------------------- if slash: # ========= # Close tag # ========= # ---------------- # Do sanity checks # ---------------- if sp < 0: raise ParseXMLInputError ("Close without open of tag: " + t + ">") if tagstack[sp] != t: raise ParseXMLInputError ("Expected close of tag <" + tagstack[sp] + "> but got close of <" + t + "> instead") # ---- # Pop! # ---- sp = sp -1 else: # ======== # Open tag # ======== # ---------- # Push! Etc. # ---------- p = path[sp] + "." + t sp = sp + 1 tagstack[sp] = t if p in idx: j = idx[p] = idx[p] + 1 else: j = idx[p] = 0 path[sp] = p + "." + str (j) # ------------------------------------- # Break out of the inner, tag-parsing # loop to go back and start on the next # token # ------------------------------------- break # --------------------------------------------------- # Append the current character to the tag name string # --------------------------------------------------- s.append (c) # ------------------------------------------------------ # Error if we reach end of buffer in the middle of a tag # ------------------------------------------------------ if i >= n: raise ParseXMLInputError ("End of buffer while looking for \">\" after tag name: \"" + "".join (s) + "\"") # ------------------------------------ # Get the next character and loop back # ------------------------------------ c = buffer[i] # print "c =", c, "i =", i, "n =", n i += 1 # ---------------------------- # Done - now do a sanity check # to make sure there are no # dangling open tags # ---------------------------- if sp != -1: e = "Unclosed tags:" for j in range (0, sp + 1): e += " <" + tagstack[j] + ">" raise ParseXMLInputError (e) # print "Exit" # for j in sorted (a.keys()): # print j, ":", a[j] # ------------------------------------------------------ # Return the dictionary with our paths and string values # ------------------------------------------------------ return a #============================================================================== # =================================================================== # PrintColumnised - takes a list/array of rows, each row itself being # a list of column cell contents (strings), calculates the maximum # width of each column (in characters), then prints the rows out as a # table with each cell left-justified and right-padded to get a # nicely-formatted table. The first row is the "control" row which # determines the number of columns the function tries to print. It # maybe should be the column headings # =================================================================== def PrintColumnised (rows): # --------------------- # Space between columns # --------------------- columnsep = " " # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # The first row controls how many columns # we attempt to output from each row. Get # the initial width for each column from # it # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ row = rows[0] columns = len (row) # print "columns =", columns i = 0 width = [] while i < columns: width.append (len (row[i])) # print "width", i, "is", width[i] i += 1 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # Now iterate through the remaining rows # and update the column widths to be the # maximum of each # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ rowcount = len (rows) j = 1 while j < rowcount: row = rows[j] i = 0 while i < columns: w = len (row[i]) if w > width[i]: width[i] = len (row[i]) i += 1 j += 1 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # Finally, output each row with each column # left-padded except for the last column # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ try: for j in range (0, rowcount): row = rows[j] i = 0 q = "" # $$$$$$$$$$$$$$$$$$$$$$$$$ # Loop once for each column # $$$$$$$$$$$$$$$$$$$$$$$$$ while i < columns: s = row[i] q += s i += 1 # ---------------------- # If not last column ... # ---------------------- if i < columns: w = len (s) n = width[i - 1] while w < n: q += " " w += 1 q += columnsep # ------------------- # Print completed row # ------------------- print q # ----------------------- # Handle error exceptions # ----------------------- except IOError as e: print >> sys.stderr, e[1] exit (0) except Exception as e: print >> sys.stderr, e exit (1) #============================================================================== # ========================================== # SizeStr - convert a size representation in # kilobytes into something more tractable # ========================================== def SizeStr (kbstr): MB = 1024 GB = 1048576 MB_MAX = GB * 5 kb = float (kbstr) if kb > MB_MAX: gb = kb / GB return "{0:.1f}G".format (gb) else: mb = kb / MB return "{0:.0f}M".format (mb) #==============================================================================