| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098 |
- <?php
- namespace PhpXmlRpc\Helper;
- use PhpXmlRpc\PhpXmlRpc;
- use PhpXmlRpc\Traits\DeprecationLogger;
- use PhpXmlRpc\Value;
- /**
- * Deals with parsing the XML.
- * @see http://xmlrpc.com/spec.md
- *
- * @todo implement an interface to allow for alternative implementations
- * - make access to $_xh protected, return more high-level data structures
- * - move the private parts of $_xh to the internal-use parsing-options config
- * - add parseRequest, parseResponse, parseValue methods
- * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding
- * while parsing, which is faster than doing it later by going over the rebuilt data structure
- * @todo rename? This is an xml-rpc parser, not a generic xml parser...
- *
- * @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC
- * @property int $accept deprecated - (protected) access left in purely for BC
- */
- class XMLParser
- {
- use DeprecationLogger;
- const RETURN_XMLRPCVALS = 'xmlrpcvals';
- const RETURN_EPIVALS = 'epivals';
- const RETURN_PHP = 'phpvals';
- const ACCEPT_REQUEST = 1;
- const ACCEPT_RESPONSE = 2;
- const ACCEPT_VALUE = 4;
- const ACCEPT_FAULT = 8;
- /**
- * @var int
- * The max length beyond which data will get truncated in error messages
- */
- protected $maxLogValueLength = 100;
- /**
- * @var array
- * Used to store state during parsing and to pass parsing results to callers.
- * Quick explanation of components:
- * private:
- * ac - used to accumulate values
- * stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements
- * valuestack - array used for parsing arrays and structs
- * lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings
- * (values: 0=not looking, 1=looking, 3=found)
- * public:
- * isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3)
- * isf_reason - used for storing xml-rpc response fault string
- * value - used to store the value in responses
- * method - used to store method name in requests
- * params - used to store parameters in requests
- * pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
- * rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode)
- */
- protected $_xh = array(
- 'ac' => '',
- 'stack' => array(),
- 'valuestack' => array(),
- 'lv' => 0,
- 'isf' => 0,
- 'isf_reason' => '',
- 'value' => null,
- 'method' => false,
- 'params' => array(),
- 'pt' => array(),
- 'rt' => '',
- );
- /**
- * @var array[]
- */
- protected $xmlrpc_valid_parents = array(
- 'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
- 'BOOLEAN' => array('VALUE'),
- 'I4' => array('VALUE'),
- 'I8' => array('VALUE'),
- 'EX:I8' => array('VALUE'),
- 'INT' => array('VALUE'),
- 'STRING' => array('VALUE'),
- 'DOUBLE' => array('VALUE'),
- 'DATETIME.ISO8601' => array('VALUE'),
- 'BASE64' => array('VALUE'),
- 'MEMBER' => array('STRUCT'),
- 'NAME' => array('MEMBER'),
- 'DATA' => array('ARRAY'),
- 'ARRAY' => array('VALUE'),
- 'STRUCT' => array('VALUE'),
- 'PARAM' => array('PARAMS'),
- 'METHODNAME' => array('METHODCALL'),
- 'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
- 'FAULT' => array('METHODRESPONSE'),
- 'NIL' => array('VALUE'), // only used when extension activated
- 'EX:NIL' => array('VALUE'), // only used when extension activated
- );
- /** @var array $parsing_options */
- protected $parsing_options = array();
- /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */
- //protected $accept = 3;
- /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */
- protected $maxChunkLength = 4194304;
- /** @var array
- * Used keys: accept, target_charset, methodname_callback, plus the ones set here.
- * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not
- * the element handler methods
- */
- protected $current_parsing_options = array(
- 'xmlrpc_null_extension' => false,
- 'xmlrpc_return_datetimes' => false,
- 'xmlrpc_reject_invalid_values' => false
- );
- /**
- * @param array $options integer keys: options passed to the inner xml parser
- * string keys:
- * - target_charset (string)
- * - methodname_callback (callable)
- * - xmlrpc_null_extension (bool)
- * - xmlrpc_return_datetimes (bool)
- * - xmlrpc_reject_invalid_values (bool)
- */
- public function __construct(array $options = array())
- {
- $this->parsing_options = $options;
- }
- /**
- * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh'].
- * Logs to the error log any issues which do not cause the parsing to fail.
- *
- * @param string $data
- * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS
- * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE
- * @param array $options integer-key options are passed to the xml parser, string-key options are used independently.
- * These options are added to options received in the constructor.
- * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values
- * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used
- * @return array see the definition of $this->_xh for the meaning of the results
- * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
- *
- * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options
- * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so
- * that parsing will be completely independent of global state. Note that it might incur a small perf hit...
- */
- public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array())
- {
- $this->_xh = array(
- 'ac' => '',
- 'stack' => array(),
- 'valuestack' => array(),
- 'lv' => 0,
- 'isf' => 0,
- 'isf_reason' => '',
- 'value' => null,
- 'method' => false, // so we can check later if we got a methodname or not
- 'params' => array(),
- 'pt' => array(),
- 'rt' => '',
- );
- $len = strlen($data);
- // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below
- if ($len == 0) {
- $this->_xh['isf'] = 3;
- $this->_xh['isf_reason'] = 'XML error 5: empty document';
- return $this->_xh;
- }
- $this->current_parsing_options = array('accept' => $accept);
- $mergedOptions = $this->parsing_options;
- foreach ($options as $key => $val) {
- $mergedOptions[$key] = $val;
- }
- foreach ($mergedOptions as $key => $val) {
- // q: can php be built without ctype? should we use a regexp?
- if (is_string($key) && !ctype_digit($key)) {
- /// @todo on invalid options, throw/error-out instead of logging an error message?
- switch($key) {
- case 'target_charset':
- if (function_exists('mb_convert_encoding')) {
- $this->current_parsing_options['target_charset'] = $val;
- } else {
- $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring");
- }
- break;
- case 'methodname_callback':
- if (is_callable($val)) {
- $this->current_parsing_options['methodname_callback'] = $val;
- } else {
- $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable");
- }
- break;
- case 'xmlrpc_null_extension':
- case 'xmlrpc_return_datetimes':
- case 'xmlrpc_reject_invalid_values':
- $this->current_parsing_options[$key] = $val;
- break;
- default:
- $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key");
- }
- unset($mergedOptions[$key]);
- }
- }
- if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) {
- $this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension;
- }
- if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) {
- $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes;
- }
- if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) {
- $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values;
- }
- // NB: we use '' instead of null to force charset detection from the xml declaration
- $parser = xml_parser_create('');
- foreach ($mergedOptions as $key => $val) {
- xml_parser_set_option($parser, $key, $val);
- }
- // always set this, in case someone tries to disable it via options...
- xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1);
- xml_set_object($parser, $this);
- switch ($returnType) {
- case self::RETURN_PHP:
- xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast');
- break;
- case self::RETURN_EPIVALS:
- xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi');
- break;
- /// @todo log an error / throw / error-out on unsupported return type
- case XMLParser::RETURN_XMLRPCVALS:
- default:
- xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee');
- }
- xml_set_character_data_handler($parser, 'xmlrpc_cd');
- xml_set_default_handler($parser, 'xmlrpc_dh');
- try {
- // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors
- for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) {
- $chunk = substr($data, $offset, $this->maxChunkLength);
- // error handling: xml not well formed
- if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) {
- $errCode = xml_get_error_code($parser);
- $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode),
- xml_get_current_line_number($parser), xml_get_current_column_number($parser));
- $this->_xh['isf'] = 3;
- $this->_xh['isf_reason'] = $errStr;
- }
- // no need to parse further if we already have a fatal error
- if ($this->_xh['isf'] >= 2) {
- break;
- }
- }
- /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times
- } catch (\Exception $e) {
- xml_parser_free($parser);
- $this->current_parsing_options = array();
- /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
- throw $e;
- } catch (\Error $e) {
- xml_parser_free($parser);
- $this->current_parsing_options = array();
- //$this->accept = $prevAccept;
- /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
- throw $e;
- }
- xml_parser_free($parser);
- $this->current_parsing_options = array();
- return $this->_xh;
- }
- /**
- * xml parser handler function for opening element tags.
- * @internal
- *
- * @param resource $parser
- * @param string $name
- * @param $attrs
- * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead
- * @return void
- *
- * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
- * and remove the checking for $this->_xh['isf'] >= 2 everywhere
- */
- public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
- {
- // if invalid xml-rpc already detected, skip all processing
- if ($this->_xh['isf'] >= 2) {
- return;
- }
- // check for correct element nesting
- if (count($this->_xh['stack']) == 0) {
- // top level element can only be of 2 types
- /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
- /// there is only a single top level element in xml anyway
- // BC
- if ($acceptSingleVals === false) {
- $accept = $this->current_parsing_options['accept'];
- } else {
- $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated');
- $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE;
- }
- if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) ||
- ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) ||
- ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) ||
- ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) {
- $this->_xh['rt'] = strtolower($name);
- } else {
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;
- return;
- }
- } else {
- // not top level element: see if parent is OK
- $parent = end($this->_xh['stack']);
- if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) {
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
- return;
- }
- }
- switch ($name) {
- // optimize for speed switch cases: most common cases first
- case 'VALUE':
- /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
- $this->_xh['vt'] = 'value'; // indicator: no value found yet
- $this->_xh['ac'] = '';
- $this->_xh['lv'] = 1;
- $this->_xh['php_class'] = null;
- break;
- case 'I8':
- case 'EX:I8':
- if (PHP_INT_SIZE === 4) {
- // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
- return;
- }
- // fall through voluntarily
- case 'I4':
- case 'INT':
- case 'STRING':
- case 'BOOLEAN':
- case 'DOUBLE':
- case 'DATETIME.ISO8601':
- case 'BASE64':
- if ($this->_xh['vt'] != 'value') {
- // two data elements inside a value: an error occurred!
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
- return;
- }
- $this->_xh['ac'] = ''; // reset the accumulator
- break;
- case 'STRUCT':
- case 'ARRAY':
- if ($this->_xh['vt'] != 'value') {
- // two data elements inside a value: an error occurred!
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
- return;
- }
- // create an empty array to hold child values, and push it onto appropriate stack
- $curVal = array(
- 'values' => array(),
- 'type' => $name,
- );
- // check for out-of-band information to rebuild php objs and, in case it is found, save it
- if (@isset($attrs['PHP_CLASS'])) {
- $curVal['php_class'] = $attrs['PHP_CLASS'];
- }
- $this->_xh['valuestack'][] = $curVal;
- $this->_xh['vt'] = 'data'; // be prepared for a data element next
- break;
- case 'DATA':
- if ($this->_xh['vt'] != 'data') {
- // two data elements inside a value: an error occurred!
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = "found two data elements inside an array element";
- return;
- }
- case 'METHODCALL':
- case 'METHODRESPONSE':
- case 'PARAMS':
- // valid elements that add little to processing
- break;
- case 'METHODNAME':
- case 'NAME':
- /// @todo we could check for 2 NAME elements inside a MEMBER element
- $this->_xh['ac'] = '';
- break;
- case 'FAULT':
- $this->_xh['isf'] = 1;
- break;
- case 'MEMBER':
- // set member name to null, in case we do not find in the xml later on
- $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null;
- //$this->_xh['ac']='';
- // Drop trough intentionally
- case 'PARAM':
- // clear value type, so we can check later if no value has been passed for this param/member
- $this->_xh['vt'] = null;
- break;
- case 'NIL':
- case 'EX:NIL':
- if ($this->current_parsing_options['xmlrpc_null_extension']) {
- if ($this->_xh['vt'] != 'value') {
- // two data elements inside a value: an error occurred!
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
- return;
- }
- // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs
- $this->_xh['ac'] = '';
- } else {
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension';
- return;
- }
- break;
- default:
- // INVALID ELEMENT: RAISE ISF so that it is later recognized
- /// @todo feature creep = allow a callback instead
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
- return;
- }
- // Save current element name to stack, to validate nesting
- $this->_xh['stack'][] = $name;
- /// @todo optimization creep: move this inside the big switch() above
- if ($name != 'VALUE') {
- $this->_xh['lv'] = 0;
- }
- }
- /**
- * xml parser handler function for close element tags.
- * @internal
- *
- * @param resource $parser
- * @param string $name
- * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility
- * @return void
- * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
- *
- * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
- * and remove the checking for $this->_xh['isf'] >= 2 everywhere
- */
- public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1)
- {
- if ($this->_xh['isf'] >= 2) {
- return;
- }
- // push this element name from stack
- // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem.
- // we also checked for proper nesting at start of elements...
- $currElem = array_pop($this->_xh['stack']);
- switch ($name) {
- case 'VALUE':
- // If no scalar was inside <VALUE></VALUE>, it was a string value
- if ($this->_xh['vt'] == 'value') {
- $this->_xh['value'] = $this->_xh['ac'];
- $this->_xh['vt'] = Value::$xmlrpcString;
- }
- // in case there is charset conversion required, do it here, to catch both cases of string values
- if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) {
- $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8');
- }
- if ($rebuildXmlrpcvals > 0) {
- // build the xml-rpc val out of the data received, and substitute it
- $temp = new Value($this->_xh['value'], $this->_xh['vt']);
- // in case we got info about underlying php class, save it in the object we're rebuilding
- if (isset($this->_xh['php_class'])) {
- $temp->_php_class = $this->_xh['php_class'];
- }
- $this->_xh['value'] = $temp;
- } elseif ($rebuildXmlrpcvals < 0) {
- if ($this->_xh['vt'] == Value::$xmlrpcDateTime) {
- $this->_xh['value'] = (object)array(
- 'xmlrpc_type' => 'datetime',
- 'scalar' => $this->_xh['value'],
- 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value'])
- );
- } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) {
- $this->_xh['value'] = (object)array(
- 'xmlrpc_type' => 'base64',
- 'scalar' => $this->_xh['value']
- );
- }
- } else {
- /// @todo this should handle php-serialized objects, since std deserializing is done
- /// by php_xmlrpc_decode, which we will not be calling...
- //if (isset($this->_xh['php_class'])) {
- //}
- }
- // check if we are inside an array or struct:
- // if value just built is inside an array, let's move it into array on the stack
- $vscount = count($this->_xh['valuestack']);
- if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
- $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
- }
- break;
- case 'STRING':
- $this->_xh['vt'] = Value::$xmlrpcString;
- $this->_xh['lv'] = 3; // indicate we've found a value
- $this->_xh['value'] = $this->_xh['ac'];
- break;
- case 'BOOLEAN':
- $this->_xh['vt'] = Value::$xmlrpcBoolean;
- $this->_xh['lv'] = 3; // indicate we've found a value
- // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted,
- // even though the spec never mentions them (see e.g. Blogger api docs)
- // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here
- // Note the non-strict type check: it will allow ' 1 '
- /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime.
- /// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and
- /// to changes in the way php compares strings (since 8.0, leading and trailing newlines are
- /// accepted when deciding if a string numeric...)
- if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) {
- $this->_xh['value'] = true;
- } else {
- // log if receiving something strange, even though we set the value to false anyway
- /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL
- if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) {
- if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' .
- $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
- return;
- }
- }
- $this->_xh['value'] = false;
- }
- break;
- case 'EX:I8':
- $name = 'i8';
- // fall through voluntarily
- case 'I4':
- case 'I8':
- case 'INT':
- // NB: we build the Value object with the original xml element name found, except for ex:i8. The
- // `Value::scalarTyp()` function will do some normalization of the data
- $this->_xh['vt'] = strtolower($name);
- $this->_xh['lv'] = 3; // indicate we've found a value
- if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) {
- if (!$this->handleParsingError('non numeric data received in INT value: ' .
- $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
- return;
- }
- /// @todo: find a better way of reporting an error value than this! Use NaN?
- $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
- } else {
- // it's ok, add it on
- $this->_xh['value'] = (int)$this->_xh['ac'];
- }
- break;
- case 'DOUBLE':
- $this->_xh['vt'] = Value::$xmlrpcDouble;
- $this->_xh['lv'] = 3; // indicate we've found a value
- if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) {
- if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' .
- $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
- return;
- }
- $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
- } else {
- // it's ok, add it on
- $this->_xh['value'] = (double)$this->_xh['ac'];
- }
- break;
- case 'DATETIME.ISO8601':
- $this->_xh['vt'] = Value::$xmlrpcDateTime;
- $this->_xh['lv'] = 3; // indicate we've found a value
- if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) {
- if (!$this->handleParsingError('invalid data received in DATETIME value: ' .
- $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
- return;
- }
- }
- if ($this->current_parsing_options['xmlrpc_return_datetimes']) {
- try {
- $this->_xh['value'] = new \DateTime($this->_xh['ac']);
- // the default regex used to validate the date string a few lines above should make this case impossible,
- // but one never knows...
- } catch(\Exception $e) {
- // what to do? We can not guarantee that a valid date can be created. We return null...
- if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' .
- $e->getMessage(), __METHOD__)) {
- return;
- }
- }
- } else {
- $this->_xh['value'] = $this->_xh['ac'];
- }
- break;
- case 'BASE64':
- $this->_xh['vt'] = Value::$xmlrpcBase64;
- $this->_xh['lv'] = 3; // indicate we've found a value
- if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
- $v = base64_decode($this->_xh['ac'], true);
- if ($v === false) {
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']);
- return;
- }
- } else {
- $v = base64_decode($this->_xh['ac']);
- if ($v === '' && $this->_xh['ac'] !== '') {
- // only the empty string should decode to the empty string
- $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' .
- $this->truncateValueForLog($this->_xh['ac']));
- }
- }
- $this->_xh['value'] = $v;
- break;
- case 'NAME':
- $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac'];
- break;
- case 'MEMBER':
- // add to array in the stack the last element built, unless no VALUE or no NAME were found
- if ($this->_xh['vt']) {
- $vscount = count($this->_xh['valuestack']);
- if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) {
- if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) {
- return;
- }
- $this->_xh['valuestack'][$vscount - 1]['name'] = '';
- }
- $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
- } else {
- if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) {
- return;
- }
- }
- break;
- case 'DATA':
- $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
- break;
- case 'STRUCT':
- case 'ARRAY':
- // fetch out of stack array of values, and promote it to current value
- $currVal = array_pop($this->_xh['valuestack']);
- $this->_xh['value'] = $currVal['values'];
- $this->_xh['vt'] = strtolower($name);
- if (isset($currVal['php_class'])) {
- $this->_xh['php_class'] = $currVal['php_class'];
- }
- break;
- case 'PARAM':
- // add to array of params the current value, unless no VALUE was found
- /// @todo should we also check if there were two VALUE inside the PARAM?
- if ($this->_xh['vt']) {
- $this->_xh['params'][] = $this->_xh['value'];
- $this->_xh['pt'][] = $this->_xh['vt'];
- } else {
- if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) {
- return;
- }
- }
- break;
- case 'METHODNAME':
- if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) {
- if (!$this->handleParsingError('invalid data received in METHODNAME: '.
- $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
- return;
- }
- }
- $methodName = trim($this->_xh['ac']);
- $this->_xh['method'] = $methodName;
- // we allow the callback to f.e. give us back a mangled method name by manipulating $this
- if (isset($this->current_parsing_options['methodname_callback'])) {
- call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser);
- }
- break;
- case 'NIL':
- case 'EX:NIL':
- // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant
- //if ($this->current_parsing_options['xmlrpc_null_extension']) {
- $this->_xh['vt'] = 'null';
- $this->_xh['value'] = null;
- $this->_xh['lv'] = 3;
- //}
- break;
- /// @todo add extra checking:
- /// - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT
- /// - FAULT should contain a single struct with the 2 expected members (check their name and type)
- /// - METHODCALL should contain a methodname
- case 'PARAMS':
- case 'FAULT':
- case 'METHODCALL':
- case 'METHODRESPONSE':
- break;
- default:
- // End of INVALID ELEMENT
- // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se,
- // $this->_xh['isf'] is set to 2...
- break;
- }
- }
- /**
- * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values.
- * @internal
- *
- * @param resource $parser
- * @param string $name
- * @return void
- */
- public function xmlrpc_ee_fast($parser, $name)
- {
- $this->xmlrpc_ee($parser, $name, 0);
- }
- /**
- * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).
- * @internal
- *
- * @param resource $parser
- * @param string $name
- * @return void
- */
- public function xmlrpc_ee_epi($parser, $name)
- {
- $this->xmlrpc_ee($parser, $name, -1);
- }
- /**
- * xml parser handler function for character data.
- * @internal
- *
- * @param resource $parser
- * @param string $data
- * @return void
- */
- public function xmlrpc_cd($parser, $data)
- {
- // skip processing if xml fault already detected
- if ($this->_xh['isf'] >= 2) {
- return;
- }
- // "lookforvalue == 3" means that we've found an entire value and should discard any further character data
- if ($this->_xh['lv'] != 3) {
- $this->_xh['ac'] .= $data;
- }
- }
- /**
- * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag.
- * In fact, it only gets called on unknown entities...
- * @internal
- *
- * @param $parser
- * @param string data
- * @return void
- */
- public function xmlrpc_dh($parser, $data)
- {
- // skip processing if xml fault already detected
- if ($this->_xh['isf'] >= 2) {
- return;
- }
- if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
- $this->_xh['ac'] .= $data;
- }
- }
- /**
- * xml charset encoding guessing helper function.
- * Tries to determine the charset encoding of an XML chunk received over HTTP.
- *
- * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
- * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers,
- * which will be most probably using UTF-8 anyway...
- * In order of importance checks:
- * 1. http headers
- * 2. BOM
- * 3. XML declaration
- * 4. guesses using mb_detect_encoding()
- *
- * @param string $httpHeader the http Content-type header
- * @param string $xmlChunk xml content buffer
- * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
- * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
- * @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
- * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
- *
- * @todo as of 2023, the relevant RFC for XML Media Types is now 7303, and for HTTP it is 9110. Check if the order of
- * precedence implemented here is still correct
- * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
- * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make
- * the method independent of global state
- */
- public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
- {
- // discussion: see http://www.yale.edu/pclt/encoding/
- // 1 - test if encoding is specified in HTTP HEADERS
- // Details:
- // LWS: (\13\10)?( |\t)+
- // token: (any char but excluded stuff)+
- // quoted string: " (any char but double quotes and control chars)* "
- // header: Content-type = ...; charset=value(; ...)*
- // where value is of type token, no LWS allowed between 'charset' and value
- // Note: we do not check for invalid chars in VALUE:
- // this had better be done using pure ereg as below
- // Note 2: we might be removing whitespace/tabs that ought to be left in if
- // the received charset is a quoted string. But nobody uses such charset names...
- /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
- $matches = array();
- if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
- return strtoupper(trim($matches[1], " \t\""));
- }
- // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
- // (source: http://www.w3.org/TR/2000/REC-xml-20001006)
- // NOTE: actually, according to the spec, even if we find the BOM and determine
- // an encoding, we should check if there is an encoding specified
- // in the xml declaration, and verify if they match.
- /// @todo implement check as described above?
- /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
- if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
- return 'UCS-4';
- } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
- return 'UTF-16';
- } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
- return 'UTF-8';
- }
- // 3 - test if encoding is specified in the xml declaration
- /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that
- /// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6.
- /// For lower versions, we could attempt usage of mb_ereg...
- // Details:
- // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
- // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
- if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
- '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
- $xmlChunk, $matches)) {
- return strtoupper(substr($matches[2], 1, -1));
- }
- // 4 - if mbstring is available, let it do the guesswork
- if (function_exists('mb_detect_encoding')) {
- if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) {
- $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings;
- }
- if ($encodingPrefs) {
- $enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
- } else {
- $enc = mb_detect_encoding($xmlChunk);
- }
- // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
- // IANA also likes better US-ASCII, so go with it
- if ($enc == 'ASCII') {
- $enc = 'US-' . $enc;
- }
- return $enc;
- } else {
- // No encoding specified: assume it is iso-8859-1, as per HTTP1.1?
- // Both RFC 2616 (HTTP 1.1) and RFC 1945 (HTTP 1.0) clearly state that for text/xxx content types
- // this should be the standard. And we should be getting text/xml as request and response.
- // BUT we have to be backward compatible with the lib, which always used UTF-8 as default. Moreover,
- // RFC 7231, which obsoletes the two RFC mentioned above, has changed the rules. It says:
- // "The default charset of ISO-8859-1 for text media types has been removed; the default is now whatever
- // the media type definition says."
- return PhpXmlRpc::$xmlrpc_defencoding;
- }
- }
- /**
- * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration).
- *
- * @param string $xmlChunk
- * @return bool
- *
- * @todo rename to hasEncodingDeclaration
- */
- public static function hasEncoding($xmlChunk)
- {
- // scan the first bytes of the data for a UTF-16 (or other) BOM pattern
- // (source: http://www.w3.org/TR/2000/REC-xml-20001006)
- if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
- return true;
- } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
- return true;
- } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
- return true;
- }
- // test if encoding is specified in the xml declaration
- // Details:
- // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
- // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
- if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
- '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
- $xmlChunk)) {
- return true;
- }
- return false;
- }
- /**
- * @param string $message
- * @param string $method method/file/line info
- * @return bool false if the caller has to stop parsing
- */
- protected function handleParsingError($message, $method = '')
- {
- if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
- $this->_xh['isf'] = 2;
- $this->_xh['isf_reason'] = ucfirst($message);
- return false;
- } else {
- $this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message);
- return true;
- }
- }
- /**
- * Truncates unsafe data
- * @param string $data
- * @return string
- */
- protected function truncateValueForLog($data)
- {
- if (strlen($data) > $this->maxLogValueLength) {
- return substr($data, 0, $this->maxLogValueLength - 3) . '...';
- }
- return $data;
- }
- // *** BC layer ***
- /**
- * xml parser handler function for opening element tags.
- * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses.
- * @deprecated
- *
- * @param resource $parser
- * @param $name
- * @param $attrs
- * @return void
- */
- public function xmlrpc_se_any($parser, $name, $attrs)
- {
- // this will be spamming the log if this method is in use...
- $this->logDeprecation('Method ' . __METHOD__ . ' is deprecated');
- $this->xmlrpc_se($parser, $name, $attrs, true);
- }
- public function &__get($name)
- {
- switch ($name) {
- case '_xh':
- case 'xmlrpc_valid_parents':
- $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated');
- return $this->$name;
- default:
- /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
- $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
- trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
- $result = null;
- return $result;
- }
- }
- public function __set($name, $value)
- {
- switch ($name) {
- // this should only ever be called by subclasses which overtook `parse()`
- case 'accept':
- $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
- $this->current_parsing_options['accept'] = $value;
- break;
- case '_xh':
- case 'xmlrpc_valid_parents':
- $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
- $this->$name = $value;
- break;
- default:
- /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
- $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
- trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
- }
- }
- public function __isset($name)
- {
- switch ($name) {
- case 'accept':
- $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
- return isset($this->current_parsing_options['accept']);
- case '_xh':
- case 'xmlrpc_valid_parents':
- $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
- return isset($this->$name);
- default:
- return false;
- }
- }
- public function __unset($name)
- {
- switch ($name) {
- // q: does this make sense at all?
- case 'accept':
- $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
- unset($this->current_parsing_options['accept']);
- break;
- case '_xh':
- case 'xmlrpc_valid_parents':
- $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
- unset($this->$name);
- break;
- default:
- /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
- $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
- trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
- }
- }
- }
|