CloverBootloader/BaseTools/Tests/CheckUnicodeSourceFiles.py

## @file
#  Unit tests for AutoGen.UniClassObject
#
#  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
#
#  SPDX-License-Identifier: BSD-2-Clause-Patent
#

##
# Import Modules
#
import os
import unittest

import codecs

import TestTools

from Common.Misc import PathClass
import AutoGen.UniClassObject as BtUni

from Common import EdkLogger
EdkLogger.InitializeForUnitTest()

class Tests(TestTools.BaseToolsTest):

    SampleData = u'''
        #langdef en-US "English"
        #string STR_A #language en-US "STR_A for en-US"
    '''

    def EncodeToFile(self, encoding, string=None):
        if string is None:
            string = self.SampleData
        if encoding is not None:
            data = codecs.encode(string, encoding)
        else:
            data = string
        path = 'input.uni'
        self.WriteTmpFile(path, data)
        return PathClass(self.GetTmpFilePath(path))

    def ErrorFailure(self, error, encoding, shouldPass):
        msg = error + ' should '
        if shouldPass:
            msg += 'not '
        msg += 'be generated for '
        msg += '%s data in a .uni file' % encoding
        self.fail(msg)

    def UnicodeErrorFailure(self, encoding, shouldPass):
        self.ErrorFailure('UnicodeError', encoding, shouldPass)

    def EdkErrorFailure(self, encoding, shouldPass):
        self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)

    def CheckFile(self, encoding, shouldPass, string=None):
        path = self.EncodeToFile(encoding, string)
        try:
            BtUni.UniFileClassObject([path])
            if shouldPass:
                return
        except UnicodeError:
            if not shouldPass:
                return
            else:
                self.UnicodeErrorFailure(encoding, shouldPass)
        except EdkLogger.FatalError:
            if not shouldPass:
                return
            else:
                self.EdkErrorFailure(encoding, shouldPass)
        except Exception:
            pass

        self.EdkErrorFailure(encoding, shouldPass)

    def testUtf16InUniFile(self):
        self.CheckFile('utf_16', shouldPass=True)

    def testSupplementaryPlaneUnicodeCharInUtf16File(self):
        #
        # Supplementary Plane characters can exist in UTF-16 files,
        # but they are not valid UCS-2 characters.
        #
        # This test makes sure that BaseTools rejects these characters
        # if seen in a .uni file.
        #
        data = u'''
            #langdef en-US "English"
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
        '''

        self.CheckFile('utf_16', shouldPass=False, string=data)

    def testSurrogatePairUnicodeCharInUtf16File(self):
        #
        # Surrogate Pair code points are used in UTF-16 files to
        # encode the Supplementary Plane characters. But, a Surrogate
        # Pair code point which is not followed by another Surrogate
        # Pair code point might be interpreted as a single code point
        # with the Surrogate Pair code point.
        #
        # This test makes sure that BaseTools rejects these characters
        # if seen in a .uni file.
        #
        data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '

        self.CheckFile(encoding=None, shouldPass=False, string=data)

    def testValidUtf8File(self):
        self.CheckFile(encoding='utf_8', shouldPass=True)

    def testValidUtf8FileWithBom(self):
        #
        # Same test as testValidUtf8File, but add the UTF-8 BOM
        #
        data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')

        self.CheckFile(encoding=None, shouldPass=True, string=data)

    def test32bitUnicodeCharInUtf8File(self):
        data = u'''
            #langdef en-US "English"
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
        '''

        self.CheckFile('utf_16', shouldPass=False, string=data)

    def test32bitUnicodeCharInUtf8File(self):
        data = u'''
            #langdef en-US "English"
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
        '''

        self.CheckFile('utf_8', shouldPass=False, string=data)

    def test32bitUnicodeCharInUtf8Comment(self):
        data = u'''
            // Even in comments, we reject non-UCS-2 chars: \U00010300
            #langdef en-US "English"
            #string STR_A #language en-US "A"
        '''

        self.CheckFile('utf_8', shouldPass=False, string=data)

    def testSurrogatePairUnicodeCharInUtf8File(self):
        #
        # Surrogate Pair code points are used in UTF-16 files to
        # encode the Supplementary Plane characters. In UTF-8, it is
        # trivial to encode these code points, but they are not valid
        # code points for characters, since they are reserved for the
        # UTF-16 Surrogate Pairs.
        #
        # This test makes sure that BaseTools rejects these characters
        # if seen in a .uni file.
        #
        data = b'\xed\xa0\x81'

        self.CheckFile(encoding=None, shouldPass=False, string=data)

    def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
        #
        # Same test as testSurrogatePairUnicodeCharInUtf8File, but add
        # the UTF-8 BOM
        #
        data = codecs.BOM_UTF8 + b'\xed\xa0\x81'

        self.CheckFile(encoding=None, shouldPass=False, string=data)

TheTestSuite = TestTools.MakeTheTestSuite(locals())

if __name__ == '__main__':
    allTests = TheTestSuite()
    unittest.TextTestRunner().run(allTests)
initial commit Clover sources 5061 and modules from EDK2 latest with legacy codes from UDK2018 Signed-off-by: Sergey Isakov <isakov-sl@bk.ru> 2019-09-03 11:58:42 +02:00			`## @file`
			`# Unit tests for AutoGen.UniClassObject`
			`#`
			`# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>`
			`#`
			`# SPDX-License-Identifier: BSD-2-Clause-Patent`
			`#`

			`##`
			`# Import Modules`
			`#`
			`import os`
			`import unittest`

			`import codecs`

			`import TestTools`

			`from Common.Misc import PathClass`
			`import AutoGen.UniClassObject as BtUni`

			`from Common import EdkLogger`
			`EdkLogger.InitializeForUnitTest()`

			`class Tests(TestTools.BaseToolsTest):`

			`SampleData = u'''`
			`#langdef en-US "English"`
			`#string STR_A #language en-US "STR_A for en-US"`
			`'''`

			`def EncodeToFile(self, encoding, string=None):`
			`if string is None:`
			`string = self.SampleData`
			`if encoding is not None:`
			`data = codecs.encode(string, encoding)`
			`else:`
			`data = string`
			`path = 'input.uni'`
			`self.WriteTmpFile(path, data)`
			`return PathClass(self.GetTmpFilePath(path))`

			`def ErrorFailure(self, error, encoding, shouldPass):`
			`msg = error + ' should '`
			`if shouldPass:`
			`msg += 'not '`
			`msg += 'be generated for '`
			`msg += '%s data in a .uni file' % encoding`
			`self.fail(msg)`

			`def UnicodeErrorFailure(self, encoding, shouldPass):`
			`self.ErrorFailure('UnicodeError', encoding, shouldPass)`

			`def EdkErrorFailure(self, encoding, shouldPass):`
			`self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)`

			`def CheckFile(self, encoding, shouldPass, string=None):`
			`path = self.EncodeToFile(encoding, string)`
			`try:`
			`BtUni.UniFileClassObject([path])`
			`if shouldPass:`
			`return`
			`except UnicodeError:`
			`if not shouldPass:`
			`return`
			`else:`
			`self.UnicodeErrorFailure(encoding, shouldPass)`
			`except EdkLogger.FatalError:`
			`if not shouldPass:`
			`return`
			`else:`
			`self.EdkErrorFailure(encoding, shouldPass)`
			`except Exception:`
			`pass`

			`self.EdkErrorFailure(encoding, shouldPass)`

			`def testUtf16InUniFile(self):`
			`self.CheckFile('utf_16', shouldPass=True)`

			`def testSupplementaryPlaneUnicodeCharInUtf16File(self):`
			`#`
			`# Supplementary Plane characters can exist in UTF-16 files,`
			`# but they are not valid UCS-2 characters.`
			`#`
			`# This test makes sure that BaseTools rejects these characters`
			`# if seen in a .uni file.`
			`#`
			`data = u'''`
			`#langdef en-US "English"`
			`#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"`
			`'''`

			`self.CheckFile('utf_16', shouldPass=False, string=data)`

			`def testSurrogatePairUnicodeCharInUtf16File(self):`
			`#`
			`# Surrogate Pair code points are used in UTF-16 files to`
			`# encode the Supplementary Plane characters. But, a Surrogate`
			`# Pair code point which is not followed by another Surrogate`
			`# Pair code point might be interpreted as a single code point`
			`# with the Surrogate Pair code point.`
			`#`
			`# This test makes sure that BaseTools rejects these characters`
			`# if seen in a .uni file.`
			`#`
			`data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '`

			`self.CheckFile(encoding=None, shouldPass=False, string=data)`

			`def testValidUtf8File(self):`
			`self.CheckFile(encoding='utf_8', shouldPass=True)`

			`def testValidUtf8FileWithBom(self):`
			`#`
			`# Same test as testValidUtf8File, but add the UTF-8 BOM`
			`#`
			`data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')`

			`self.CheckFile(encoding=None, shouldPass=True, string=data)`

			`def test32bitUnicodeCharInUtf8File(self):`
			`data = u'''`
			`#langdef en-US "English"`
			`#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"`
			`'''`

			`self.CheckFile('utf_16', shouldPass=False, string=data)`

			`def test32bitUnicodeCharInUtf8File(self):`
			`data = u'''`
			`#langdef en-US "English"`
			`#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"`
			`'''`

			`self.CheckFile('utf_8', shouldPass=False, string=data)`

			`def test32bitUnicodeCharInUtf8Comment(self):`
			`data = u'''`
			`// Even in comments, we reject non-UCS-2 chars: \U00010300`
			`#langdef en-US "English"`
			`#string STR_A #language en-US "A"`
			`'''`

			`self.CheckFile('utf_8', shouldPass=False, string=data)`

			`def testSurrogatePairUnicodeCharInUtf8File(self):`
			`#`
			`# Surrogate Pair code points are used in UTF-16 files to`
			`# encode the Supplementary Plane characters. In UTF-8, it is`
			`# trivial to encode these code points, but they are not valid`
			`# code points for characters, since they are reserved for the`
			`# UTF-16 Surrogate Pairs.`
			`#`
			`# This test makes sure that BaseTools rejects these characters`
			`# if seen in a .uni file.`
			`#`
			`data = b'\xed\xa0\x81'`

			`self.CheckFile(encoding=None, shouldPass=False, string=data)`

			`def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):`
			`#`
			`# Same test as testSurrogatePairUnicodeCharInUtf8File, but add`
			`# the UTF-8 BOM`
			`#`
			`data = codecs.BOM_UTF8 + b'\xed\xa0\x81'`

			`self.CheckFile(encoding=None, shouldPass=False, string=data)`

			`TheTestSuite = TestTools.MakeTheTestSuite(locals())`

			`if __name__ == '__main__':`
			`allTests = TheTestSuite()`
			`unittest.TextTestRunner().run(allTests)`