summaryrefslogtreecommitdiff
path: root/vendor/wikimedia/avro/lib/avro/schema.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/wikimedia/avro/lib/avro/schema.php')
-rw-r--r--vendor/wikimedia/avro/lib/avro/schema.php1457
1 files changed, 1457 insertions, 0 deletions
diff --git a/vendor/wikimedia/avro/lib/avro/schema.php b/vendor/wikimedia/avro/lib/avro/schema.php
new file mode 100644
index 00000000..3d7fbbb8
--- /dev/null
+++ b/vendor/wikimedia/avro/lib/avro/schema.php
@@ -0,0 +1,1457 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Avro Schema and and Avro Schema support classes.
+ * @package Avro
+ */
+
+/** TODO
+ * - ARRAY have only type and item attributes (what about metadata?)
+ * - MAP keys are (assumed?) to be strings
+ * - FIXED size must be integer (must be positive? less than MAXINT?)
+ * - primitive type names cannot have a namespace (so throw an error? or ignore?)
+ * - schema may contain multiple definitions of a named schema
+ * if definitions are equivalent (?)
+ * - Cleanup default namespace and named schemata handling.
+ * - For one, it appears to be *too* global. According to the spec,
+ * we should only be referencing schemas that are named within the
+ * *enclosing* schema, so those in sibling schemas (say, unions or fields)
+ * shouldn't be referenced, if I understand the spec correctly.
+ * - Also, if a named schema is defined more than once in the same schema,
+ * it must have the same definition: so it appears we *do* need to keep
+ * track of named schemata globally as well. (And does this play well
+ * with the requirements regarding enclosing schema?
+ * - default values for bytes and fixed fields are JSON strings,
+ * where unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255
+ * - make sure other default values for other schema are of appropriate type
+ * - Should AvroField really be an AvroSchema object? Avro Fields have a name
+ * attribute, but not a namespace attribute (and the name can't be namespace
+ * qualified). It also has additional attributes such as doc, which named schemas
+ * enum and record have (though not fixed schemas, which also have names), and
+ * fields also have default and order attributes, shared by no other schema type.
+ */
+
+/**
+ * Exceptions associated with parsing JSON schema represenations
+ * @package Avro
+ */
+class AvroSchemaParseException extends AvroException {};
+
+/**
+ * @package Avro
+ */
+class AvroSchema
+{
+ /**
+ * @var int lower bound of integer values: -(1 << 31)
+ */
+ const INT_MIN_VALUE = -2147483648;
+
+ /**
+ * @var int upper bound of integer values: (1 << 31) - 1
+ */
+ const INT_MAX_VALUE = 2147483647;
+
+ /**
+ * @var long lower bound of long values: -(1 << 63)
+ */
+ const LONG_MIN_VALUE = -9223372036854775808;
+
+ /**
+ * @var long upper bound of long values: (1 << 63) - 1
+ */
+ const LONG_MAX_VALUE = 9223372036854775807;
+
+ /**
+ * @var string null schema type name
+ */
+ const NULL_TYPE = 'null';
+
+ /**
+ * @var string boolean schema type name
+ */
+ const BOOLEAN_TYPE = 'boolean';
+
+ /**
+ * int schema type value is a 32-bit signed int
+ * @var string int schema type name.
+ */
+ const INT_TYPE = 'int';
+
+ /**
+ * long schema type value is a 64-bit signed int
+ * @var string long schema type name
+ */
+ const LONG_TYPE = 'long';
+
+ /**
+ * float schema type value is a 32-bit IEEE 754 floating-point number
+ * @var string float schema type name
+ */
+ const FLOAT_TYPE = 'float';
+
+ /**
+ * double schema type value is a 64-bit IEEE 754 floating-point number
+ * @var string double schema type name
+ */
+ const DOUBLE_TYPE = 'double';
+
+ /**
+ * string schema type value is a Unicode character sequence
+ * @var string string schema type name
+ */
+ const STRING_TYPE = 'string';
+
+ /**
+ * bytes schema type value is a sequence of 8-bit unsigned bytes
+ * @var string bytes schema type name
+ */
+ const BYTES_TYPE = 'bytes';
+
+ // Complex Types
+ // Unnamed Schema
+ /**
+ * @var string array schema type name
+ */
+ const ARRAY_SCHEMA = 'array';
+
+ /**
+ * @var string map schema type name
+ */
+ const MAP_SCHEMA = 'map';
+
+ /**
+ * @var string union schema type name
+ */
+ const UNION_SCHEMA = 'union';
+
+ /**
+ * Unions of error schemas are used by Avro messages
+ * @var string error_union schema type name
+ */
+ const ERROR_UNION_SCHEMA = 'error_union';
+
+ // Named Schema
+
+ /**
+ * @var string enum schema type name
+ */
+ const ENUM_SCHEMA = 'enum';
+
+ /**
+ * @var string fixed schema type name
+ */
+ const FIXED_SCHEMA = 'fixed';
+
+ /**
+ * @var string record schema type name
+ */
+ const RECORD_SCHEMA = 'record';
+ // Other Schema
+
+ /**
+ * @var string error schema type name
+ */
+ const ERROR_SCHEMA = 'error';
+
+ /**
+ * @var string request schema type name
+ */
+ const REQUEST_SCHEMA = 'request';
+
+
+ // Schema attribute names
+ /**
+ * @var string schema type name attribute name
+ */
+ const TYPE_ATTR = 'type';
+
+ /**
+ * @var string named schema name attribute name
+ */
+ const NAME_ATTR = 'name';
+
+ /**
+ * @var string named schema namespace attribute name
+ */
+ const NAMESPACE_ATTR = 'namespace';
+
+ /**
+ * @var string derived attribute: doesn't appear in schema
+ */
+ const FULLNAME_ATTR = 'fullname';
+
+ /**
+ * @var string array schema size attribute name
+ */
+ const SIZE_ATTR = 'size';
+
+ /**
+ * @var string record fields attribute name
+ */
+ const FIELDS_ATTR = 'fields';
+
+ /**
+ * @var string array schema items attribute name
+ */
+ const ITEMS_ATTR = 'items';
+
+ /**
+ * @var string enum schema symbols attribute name
+ */
+ const SYMBOLS_ATTR = 'symbols';
+
+ /**
+ * @var string map schema values attribute name
+ */
+ const VALUES_ATTR = 'values';
+
+ /**
+ * @var string document string attribute name
+ */
+ const DOC_ATTR = 'doc';
+
+ /**
+ * @var array list of primitive schema type names
+ */
+ private static $primitive_types = array(self::NULL_TYPE, self::BOOLEAN_TYPE,
+ self::STRING_TYPE, self::BYTES_TYPE,
+ self::INT_TYPE, self::LONG_TYPE,
+ self::FLOAT_TYPE, self::DOUBLE_TYPE);
+
+ /**
+ * @var array list of named schema type names
+ */
+ private static $named_types = array(self::FIXED_SCHEMA, self::ENUM_SCHEMA,
+ self::RECORD_SCHEMA, self::ERROR_SCHEMA);
+
+ /**
+ * @param string $type a schema type name
+ * @returns boolean true if the given type name is a named schema type name
+ * and false otherwise.
+ */
+ public static function is_named_type($type)
+ {
+ return in_array($type, self::$named_types);
+ }
+
+ /**
+ * @param string $type a schema type name
+ * @returns boolean true if the given type name is a primitive schema type
+ * name and false otherwise.
+ */
+ public static function is_primitive_type($type)
+ {
+ return in_array($type, self::$primitive_types);
+ }
+
+ /**
+ * @param string $type a schema type name
+ * @returns boolean true if the given type name is a valid schema type
+ * name and false otherwise.
+ */
+ public static function is_valid_type($type)
+ {
+ return (self::is_primitive_type($type)
+ || self::is_named_type($type)
+ || in_array($type, array(self::ARRAY_SCHEMA,
+ self::MAP_SCHEMA,
+ self::UNION_SCHEMA,
+ self::REQUEST_SCHEMA,
+ self::ERROR_UNION_SCHEMA)));
+ }
+
+ /**
+ * @var array list of names of reserved attributes
+ */
+ private static $reserved_attrs = array(self::TYPE_ATTR,
+ self::NAME_ATTR,
+ self::NAMESPACE_ATTR,
+ self::FIELDS_ATTR,
+ self::ITEMS_ATTR,
+ self::SIZE_ATTR,
+ self::SYMBOLS_ATTR,
+ self::VALUES_ATTR);
+
+ /**
+ * @param string $json JSON-encoded schema
+ * @uses self::real_parse()
+ * @returns AvroSchema
+ */
+ public static function parse($json)
+ {
+ $schemata = new AvroNamedSchemata();
+ return self::real_parse(json_decode($json, true), null, $schemata);
+ }
+
+ /**
+ * @param mixed $avro JSON-decoded schema
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata reference to named schemas
+ * @returns AvroSchema
+ * @throws AvroSchemaParseException
+ */
+ static function real_parse($avro, $default_namespace=null, &$schemata=null)
+ {
+ if (is_null($schemata))
+ $schemata = new AvroNamedSchemata();
+
+ if (is_array($avro))
+ {
+ $type = AvroUtil::array_value($avro, self::TYPE_ATTR);
+
+ if (self::is_primitive_type($type))
+ return new AvroPrimitiveSchema($type);
+
+ elseif (self::is_named_type($type))
+ {
+ $name = AvroUtil::array_value($avro, self::NAME_ATTR);
+ $namespace = AvroUtil::array_value($avro, self::NAMESPACE_ATTR);
+ $new_name = new AvroName($name, $namespace, $default_namespace);
+ $doc = AvroUtil::array_value($avro, self::DOC_ATTR);
+ switch ($type)
+ {
+ case self::FIXED_SCHEMA:
+ $size = AvroUtil::array_value($avro, self::SIZE_ATTR);
+ return new AvroFixedSchema($new_name, $doc,
+ $size,
+ $schemata);
+ case self::ENUM_SCHEMA:
+ $symbols = AvroUtil::array_value($avro, self::SYMBOLS_ATTR);
+ return new AvroEnumSchema($new_name, $doc,
+ $symbols,
+ $schemata);
+ case self::RECORD_SCHEMA:
+ case self::ERROR_SCHEMA:
+ $fields = AvroUtil::array_value($avro, self::FIELDS_ATTR);
+ return new AvroRecordSchema($new_name, $doc,
+ $fields,
+ $schemata, $type);
+ default:
+ throw new AvroSchemaParseException(
+ sprintf('Unknown named type: %s', $type));
+ }
+ }
+ elseif (self::is_valid_type($type))
+ {
+ switch ($type)
+ {
+ case self::ARRAY_SCHEMA:
+ return new AvroArraySchema($avro[self::ITEMS_ATTR],
+ $default_namespace,
+ $schemata);
+ case self::MAP_SCHEMA:
+ return new AvroMapSchema($avro[self::VALUES_ATTR],
+ $default_namespace,
+ $schemata);
+ default:
+ throw new AvroSchemaParseException(
+ sprintf('Unknown valid type: %s', $type));
+ }
+ }
+ elseif (!array_key_exists(self::TYPE_ATTR, $avro)
+ && AvroUtil::is_list($avro))
+ return new AvroUnionSchema($avro, $default_namespace, $schemata);
+ else
+ throw new AvroSchemaParseException(sprintf('Undefined type: %s',
+ $type));
+ }
+ elseif (self::is_primitive_type($avro))
+ return new AvroPrimitiveSchema($avro);
+ else
+ throw new AvroSchemaParseException(
+ sprintf('%s is not a schema we know about.',
+ print_r($avro, true)));
+ }
+
+ /**
+ * @returns boolean true if $datum is valid for $expected_schema
+ * and false otherwise.
+ * @throws AvroSchemaParseException
+ */
+ public static function is_valid_datum($expected_schema, $datum)
+ {
+ switch($expected_schema->type)
+ {
+ case self::NULL_TYPE:
+ return is_null($datum);
+ case self::BOOLEAN_TYPE:
+ return is_bool($datum);
+ case self::STRING_TYPE:
+ case self::BYTES_TYPE:
+ return is_string($datum);
+ case self::INT_TYPE:
+ return (is_int($datum)
+ && (self::INT_MIN_VALUE <= $datum)
+ && ($datum <= self::INT_MAX_VALUE));
+ case self::LONG_TYPE:
+ return (is_int($datum)
+ && (self::LONG_MIN_VALUE <= $datum)
+ && ($datum <= self::LONG_MAX_VALUE));
+ case self::FLOAT_TYPE:
+ case self::DOUBLE_TYPE:
+ return (is_float($datum) || is_int($datum));
+ case self::ARRAY_SCHEMA:
+ if (is_array($datum))
+ {
+ foreach ($datum as $d)
+ if (!self::is_valid_datum($expected_schema->items(), $d))
+ return false;
+ return true;
+ }
+ return false;
+ case self::MAP_SCHEMA:
+ if (is_array($datum))
+ {
+ foreach ($datum as $k => $v)
+ if (!is_string($k)
+ || !self::is_valid_datum($expected_schema->values(), $v))
+ return false;
+ return true;
+ }
+ return false;
+ case self::UNION_SCHEMA:
+ foreach ($expected_schema->schemas() as $schema)
+ if (self::is_valid_datum($schema, $datum))
+ return true;
+ return false;
+ case self::ENUM_SCHEMA:
+ return in_array($datum, $expected_schema->symbols());
+ case self::FIXED_SCHEMA:
+ return (is_string($datum)
+ && (strlen($datum) == $expected_schema->size()));
+ case self::RECORD_SCHEMA:
+ case self::ERROR_SCHEMA:
+ case self::REQUEST_SCHEMA:
+ if (is_array($datum))
+ {
+ foreach ($expected_schema->fields() as $field)
+ if (!array_key_exists($field->name(), $datum) || !self::is_valid_datum($field->type(), $datum[$field->name()]))
+ return false;
+ return true;
+ }
+ return false;
+ default:
+ throw new AvroSchemaParseException(
+ sprintf('%s is not allowed.', $expected_schema));
+ }
+ }
+
+ /**
+ * @internal Should only be called from within the constructor of
+ * a class which extends AvroSchema
+ * @param string $type a schema type name
+ */
+ public function __construct($type)
+ {
+ $this->type = $type;
+ }
+
+ /**
+ * @param mixed $avro
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ * @returns AvroSchema
+ * @uses AvroSchema::real_parse()
+ * @throws AvroSchemaParseException
+ */
+ protected static function subparse($avro, $default_namespace, &$schemata=null)
+ {
+ try
+ {
+ return self::real_parse($avro, $default_namespace, $schemata);
+ }
+ catch (AvroSchemaParseException $e)
+ {
+ throw $e;
+ }
+ catch (Exception $e)
+ {
+ throw new AvroSchemaParseException(
+ sprintf('Sub-schema is not a valid Avro schema. Bad schema: %s',
+ print_r($avro, true)));
+ }
+
+ }
+
+ /**
+ * @returns string schema type name of this schema
+ */
+ public function type() { return $this->type; }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ return array(self::TYPE_ATTR => $this->type);
+ }
+
+ /**
+ * @returns string the JSON-encoded representation of this Avro schema.
+ */
+ public function __toString() { return json_encode($this->to_avro()); }
+
+ /**
+ * @returns mixed value of the attribute with the given attribute name
+ */
+ public function attribute($attribute) { return $this->$attribute(); }
+
+}
+
+/**
+ * Avro schema for basic types such as null, int, long, string.
+ * @package Avro
+ */
+class AvroPrimitiveSchema extends AvroSchema
+{
+
+ /**
+ * @param string $type the primitive schema type name
+ * @throws AvroSchemaParseException if the given $type is not a
+ * primitive schema type name
+ */
+ public function __construct($type)
+ {
+ if (self::is_primitive_type($type))
+ return parent::__construct($type);
+ throw new AvroSchemaParseException(
+ sprintf('%s is not a valid primitive type.', $type));
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = parent::to_avro();
+ // FIXME: Is this if really necessary? When *wouldn't* this be the case?
+ if (1 == count($avro))
+ return $this->type;
+ return $avro;
+ }
+}
+
+/**
+ * Avro array schema, consisting of items of a particular
+ * Avro schema type.
+ * @package Avro
+ */
+class AvroArraySchema extends AvroSchema
+{
+ /**
+ * @var AvroName|AvroSchema named schema name or AvroSchema of
+ * array element
+ */
+ private $items;
+
+ /**
+ * @var boolean true if the items schema
+ * FIXME: couldn't we derive this from whether or not $this->items
+ * is an AvroName or an AvroSchema?
+ */
+ private $is_items_schema_from_schemata;
+
+ /**
+ * @param string|mixed $items AvroNamedSchema name or object form
+ * of decoded JSON schema representation.
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($items, $default_namespace, &$schemata=null)
+ {
+ parent::__construct(AvroSchema::ARRAY_SCHEMA);
+
+ $this->is_items_schema_from_schemata = false;
+ $items_schema = null;
+ if (is_string($items)
+ && $items_schema = $schemata->schema_by_name(
+ new AvroName($items, null, $default_namespace)))
+ $this->is_items_schema_from_schemata = true;
+ else
+ $items_schema = AvroSchema::subparse($items, $default_namespace, $schemata);
+
+ $this->items = $items_schema;
+ }
+
+
+ /**
+ * @returns AvroName|AvroSchema named schema name or AvroSchema
+ * of this array schema's elements.
+ */
+ public function items() { return $this->items; }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = parent::to_avro();
+ $avro[AvroSchema::ITEMS_ATTR] = $this->is_items_schema_from_schemata
+ ? $this->items->qualified_name() : $this->items->to_avro();
+ return $avro;
+ }
+}
+
+/**
+ * Avro map schema consisting of named values of defined
+ * Avro Schema types.
+ * @package Avro
+ */
+class AvroMapSchema extends AvroSchema
+{
+ /**
+ * @var string|AvroSchema named schema name or AvroSchema
+ * of map schema values.
+ */
+ private $values;
+
+ /**
+ * @var boolean true if the named schema
+ * XXX Couldn't we derive this based on whether or not
+ * $this->values is a string?
+ */
+ private $is_values_schema_from_schemata;
+
+ /**
+ * @param string|AvroSchema $values
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($values, $default_namespace, &$schemata=null)
+ {
+ parent::__construct(AvroSchema::MAP_SCHEMA);
+
+ $this->is_values_schema_from_schemata = false;
+ $values_schema = null;
+ if (is_string($values)
+ && $values_schema = $schemata->schema_by_name(
+ new AvroName($values, null, $default_namespace)))
+ $this->is_values_schema_from_schemata = true;
+ else
+ $values_schema = AvroSchema::subparse($values, $default_namespace,
+ $schemata);
+
+ $this->values = $values_schema;
+ }
+
+ /**
+ * @returns XXX|AvroSchema
+ */
+ public function values() { return $this->values; }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = parent::to_avro();
+ $avro[AvroSchema::VALUES_ATTR] = $this->is_values_schema_from_schemata
+ ? $this->values->qualified_name() : $this->values->to_avro();
+ return $avro;
+ }
+}
+
+/**
+ * Union of Avro schemas, of which values can be of any of the schema in
+ * the union.
+ * @package Avro
+ */
+class AvroUnionSchema extends AvroSchema
+{
+ /**
+ * @var AvroSchema[] list of schemas of this union
+ */
+ private $schemas;
+
+ /**
+ * @var int[] list of indices of named schemas which
+ * are defined in $schemata
+ */
+ public $schema_from_schemata_indices;
+
+ /**
+ * @param AvroSchema[] $schemas list of schemas in the union
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($schemas, $default_namespace, &$schemata=null)
+ {
+ parent::__construct(AvroSchema::UNION_SCHEMA);
+
+ $this->schema_from_schemata_indices = array();
+ $schema_types = array();
+ foreach ($schemas as $index => $schema)
+ {
+ $is_schema_from_schemata = false;
+ $new_schema = null;
+ if (is_string($schema)
+ && ($new_schema = $schemata->schema_by_name(
+ new AvroName($schema, null, $default_namespace))))
+ $is_schema_from_schemata = true;
+ else
+ $new_schema = self::subparse($schema, $default_namespace, $schemata);
+
+ $schema_type = $new_schema->type;
+ if (self::is_valid_type($schema_type)
+ && !self::is_named_type($schema_type)
+ && in_array($schema_type, $schema_types))
+ throw new AvroSchemaParseException(
+ sprintf('"%s" is already in union', $schema_type));
+ elseif (AvroSchema::UNION_SCHEMA == $schema_type)
+ throw new AvroSchemaParseException('Unions cannot contain other unions');
+ else
+ {
+ $schema_types []= $schema_type;
+ $this->schemas []= $new_schema;
+ if ($is_schema_from_schemata)
+ $this->schema_from_schemata_indices []= $index;
+ }
+ }
+
+ }
+
+ /**
+ * @returns AvroSchema[]
+ */
+ public function schemas() { return $this->schemas; }
+
+ /**
+ * @returns AvroSchema the particular schema from the union for
+ * the given (zero-based) index.
+ * @throws AvroSchemaParseException if the index is invalid for this schema.
+ */
+ public function schema_by_index($index)
+ {
+ if (count($this->schemas) > $index)
+ return $this->schemas[$index];
+
+ throw new AvroSchemaParseException('Invalid union schema index');
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = array();
+
+ foreach ($this->schemas as $index => $schema)
+ $avro []= (in_array($index, $this->schema_from_schemata_indices))
+ ? $schema->qualified_name() : $schema->to_avro();
+
+ return $avro;
+ }
+}
+
+/**
+ * Parent class of named Avro schema
+ * @package Avro
+ * @todo Refactor AvroNamedSchema to use an AvroName instance
+ * to store name information.
+ */
+class AvroNamedSchema extends AvroSchema
+{
+ /**
+ * @var AvroName $name
+ */
+ private $name;
+
+ /**
+ * @var string documentation string
+ */
+ private $doc;
+
+ /**
+ * @param string $type
+ * @param AvroName $name
+ * @param string $doc documentation string
+ * @param AvroNamedSchemata &$schemata
+ * @throws AvroSchemaParseException
+ */
+ public function __construct($type, $name, $doc=null, &$schemata=null)
+ {
+ parent::__construct($type);
+ $this->name = $name;
+
+ if ($doc && !is_string($doc))
+ throw new AvroSchemaParseException('Schema doc attribute must be a string');
+ $this->doc = $doc;
+
+ if (!is_null($schemata))
+ $schemata = $schemata->clone_with_new_schema($this);
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = parent::to_avro();
+ list($name, $namespace) = AvroName::extract_namespace($this->qualified_name());
+ $avro[AvroSchema::NAME_ATTR] = $name;
+ if ($namespace)
+ $avro[AvroSchema::NAMESPACE_ATTR] = $namespace;
+ if (!is_null($this->doc))
+ $avro[AvroSchema::DOC_ATTR] = $this->doc;
+ return $avro;
+ }
+
+ /**
+ * @returns string
+ */
+ public function fullname() { return $this->name->fullname(); }
+
+ public function qualified_name() { return $this->name->qualified_name(); }
+
+}
+
+/**
+ * @package Avro
+ */
+class AvroName
+{
+ /**
+ * @var string character used to separate names comprising the fullname
+ */
+ const NAME_SEPARATOR = '.';
+
+ /**
+ * @var string regular expression to validate name values
+ */
+ const NAME_REGEXP = '/^[A-Za-z_][A-Za-z0-9_]*$/';
+
+ /**
+ * @returns string[] array($name, $namespace)
+ */
+ public static function extract_namespace($name, $namespace=null)
+ {
+ $parts = explode(self::NAME_SEPARATOR, $name);
+ if (count($parts) > 1)
+ {
+ $name = array_pop($parts);
+ $namespace = join(self::NAME_SEPARATOR, $parts);
+ }
+ return array($name, $namespace);
+ }
+
+ /**
+ * @returns boolean true if the given name is well-formed
+ * (is a non-null, non-empty string) and false otherwise
+ */
+ public static function is_well_formed_name($name)
+ {
+ return (is_string($name) && !empty($name)
+ && preg_match(self::NAME_REGEXP, $name));
+ }
+
+ /**
+ * @param string $namespace
+ * @returns boolean true if namespace is composed of valid names
+ * @throws AvroSchemaParseException if any of the namespace components
+ * are invalid.
+ */
+ private static function check_namespace_names($namespace)
+ {
+ foreach (explode(self::NAME_SEPARATOR, $namespace) as $n)
+ {
+ if (empty($n) || (0 == preg_match(self::NAME_REGEXP, $n)))
+ throw new AvroSchemaParseException(sprintf('Invalid name "%s"', $n));
+ }
+ return true;
+ }
+
+ /**
+ * @param string $name
+ * @param string $namespace
+ * @returns string
+ * @throws AvroSchemaParseException if any of the names are not valid.
+ */
+ private static function parse_fullname($name, $namespace)
+ {
+ if (!is_string($namespace) || empty($namespace))
+ throw new AvroSchemaParseException('Namespace must be a non-empty string.');
+ self::check_namespace_names($namespace);
+ return $namespace . '.' . $name;
+ }
+
+ /**
+ * @var string valid names are matched by self::NAME_REGEXP
+ */
+ private $name;
+
+ /**
+ * @var string
+ */
+ private $namespace;
+
+ /**
+ * @var string
+ */
+ private $fullname;
+
+ /**
+ * @var string Name qualified as necessary given its default namespace.
+ */
+ private $qualified_name;
+
+ /**
+ * @param string $name
+ * @param string $namespace
+ * @param string $default_namespace
+ */
+ public function __construct($name, $namespace, $default_namespace)
+ {
+ if (!is_string($name) || empty($name))
+ throw new AvroSchemaParseException('Name must be a non-empty string.');
+
+ if (strpos($name, self::NAME_SEPARATOR)
+ && self::check_namespace_names($name))
+ $this->fullname = $name;
+ elseif (0 == preg_match(self::NAME_REGEXP, $name))
+ throw new AvroSchemaParseException(sprintf('Invalid name "%s"', $name));
+ elseif (!is_null($namespace))
+ $this->fullname = self::parse_fullname($name, $namespace);
+ elseif (!is_null($default_namespace))
+ $this->fullname = self::parse_fullname($name, $default_namespace);
+ else
+ $this->fullname = $name;
+
+ list($this->name, $this->namespace) = self::extract_namespace($this->fullname);
+ $this->qualified_name = (is_null($this->namespace)
+ || $this->namespace == $default_namespace)
+ ? $this->name : $this->fullname;
+ }
+
+ /**
+ * @returns array array($name, $namespace)
+ */
+ public function name_and_namespace()
+ {
+ return array($this->name, $this->namespace);
+ }
+
+ /**
+ * @returns string
+ */
+ public function fullname() { return $this->fullname; }
+
+ /**
+ * @returns string fullname
+ * @uses $this->fullname()
+ */
+ public function __toString() { return $this->fullname(); }
+
+ /**
+ * @returns string name qualified for its context
+ */
+ public function qualified_name() { return $this->qualified_name; }
+
+}
+
+/**
+ * Keeps track of AvroNamedSchema which have been observed so far,
+ * as well as the default namespace.
+ *
+ * @package Avro
+ */
+class AvroNamedSchemata
+{
+ /**
+ * @var AvroNamedSchema[]
+ */
+ private $schemata;
+
+ /**
+ * @param AvroNamedSchemata[]
+ */
+ public function __construct($schemata=array())
+ {
+ $this->schemata = $schemata;
+ }
+
+ public function list_schemas() {
+ var_export($this->schemata);
+ foreach($this->schemata as $sch)
+ print('Schema '.$sch->__toString()."\n");
+ }
+
+ /**
+ * @param string $fullname
+ * @returns boolean true if there exists a schema with the given name
+ * and false otherwise.
+ */
+ public function has_name($fullname)
+ {
+ return array_key_exists($fullname, $this->schemata);
+ }
+
+ /**
+ * @param string $fullname
+ * @returns AvroSchema|null the schema which has the given name,
+ * or null if there is no schema with the given name.
+ */
+ public function schema($fullname)
+ {
+ if (isset($this->schemata[$fullname]))
+ return $this->schemata[$fullname];
+ return null;
+ }
+
+ /**
+ * @param AvroName $name
+ * @returns AvroSchema|null
+ */
+ public function schema_by_name($name)
+ {
+ return $this->schema($name->fullname());
+ }
+
+ /**
+ * Creates a new AvroNamedSchemata instance of this schemata instance
+ * with the given $schema appended.
+ * @param AvroNamedSchema schema to add to this existing schemata
+ * @returns AvroNamedSchemata
+ */
+ public function clone_with_new_schema($schema)
+ {
+ $name = $schema->fullname();
+ if (AvroSchema::is_valid_type($name))
+ throw new AvroSchemaParseException(
+ sprintf('Name "%s" is a reserved type name', $name));
+ else if ($this->has_name($name))
+ throw new AvroSchemaParseException(
+ sprintf('Name "%s" is already in use', $name));
+ $schemata = new AvroNamedSchemata($this->schemata);
+ $schemata->schemata[$name] = $schema;
+ return $schemata;
+ }
+}
+
+/**
+ * @package Avro
+ */
+class AvroEnumSchema extends AvroNamedSchema
+{
+ /**
+ * @var string[] array of symbols
+ */
+ private $symbols;
+
+ /**
+ * @param AvroName $name
+ * @param string $doc
+ * @param string[] $symbols
+ * @param AvroNamedSchemata &$schemata
+ * @throws AvroSchemaParseException
+ */
+ public function __construct($name, $doc, $symbols, &$schemata=null)
+ {
+ if (!AvroUtil::is_list($symbols))
+ throw new AvroSchemaParseException('Enum Schema symbols are not a list');
+
+ if (count(array_unique($symbols)) > count($symbols))
+ throw new AvroSchemaParseException(
+ sprintf('Duplicate symbols: %s', $symbols));
+
+ foreach ($symbols as $symbol)
+ if (!is_string($symbol) || empty($symbol))
+ throw new AvroSchemaParseException(
+ sprintf('Enum schema symbol must be a string %',
+ print_r($symbol, true)));
+
+ parent::__construct(AvroSchema::ENUM_SCHEMA, $name, $doc, $schemata);
+ $this->symbols = $symbols;
+ }
+
+ /**
+ * @returns string[] this enum schema's symbols
+ */
+ public function symbols() { return $this->symbols; }
+
+ /**
+ * @param string $symbol
+ * @returns boolean true if the given symbol exists in this
+ * enum schema and false otherwise
+ */
+ public function has_symbol($symbol)
+ {
+ return in_array($symbol, $this->symbols);
+ }
+
+ /**
+ * @param int $index
+ * @returns string enum schema symbol with the given (zero-based) index
+ */
+ public function symbol_by_index($index)
+ {
+ if (array_key_exists($index, $this->symbols))
+ return $this->symbols[$index];
+ throw new AvroException(sprintf('Invalid symbol index %d', $index));
+ }
+
+ /**
+ * @param string $symbol
+ * @returns int the index of the given $symbol in the enum schema
+ */
+ public function symbol_index($symbol)
+ {
+ $idx = array_search($symbol, $this->symbols, true);
+ if (false !== $idx)
+ return $idx;
+ throw new AvroException(sprintf("Invalid symbol value '%s'", $symbol));
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = parent::to_avro();
+ $avro[AvroSchema::SYMBOLS_ATTR] = $this->symbols;
+ return $avro;
+ }
+}
+
+/**
+ * AvroNamedSchema with fixed-length data values
+ * @package Avro
+ */
+class AvroFixedSchema extends AvroNamedSchema
+{
+
+ /**
+ * @var int byte count of this fixed schema data value
+ */
+ private $size;
+
+ /**
+ * @param AvroName $name
+ * @param string $doc Set to null, as fixed schemas don't have doc strings
+ * @param int $size byte count of this fixed schema data value
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($name, $doc, $size, &$schemata=null)
+ {
+ $doc = null; // Fixed schemas don't have doc strings.
+ if (!is_integer($size))
+ throw new AvroSchemaParseException(
+ 'Fixed Schema requires a valid integer for "size" attribute');
+ parent::__construct(AvroSchema::FIXED_SCHEMA, $name, $doc, $schemata);
+ return $this->size = $size;
+ }
+
+ /**
+ * @returns int byte count of this fixed schema data value
+ */
+ public function size() { return $this->size; }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = parent::to_avro();
+ $avro[AvroSchema::SIZE_ATTR] = $this->size;
+ return $avro;
+ }
+}
+
+/**
+ * @package Avro
+ */
+class AvroRecordSchema extends AvroNamedSchema
+{
+ /**
+ * @param mixed $field_data
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ * @returns AvroField[]
+ * @throws AvroSchemaParseException
+ */
+ static function parse_fields($field_data, $default_namespace, &$schemata)
+ {
+ $fields = array();
+ $field_names = array();
+ foreach ($field_data as $index => $field)
+ {
+ $name = AvroUtil::array_value($field, AvroField::FIELD_NAME_ATTR);
+ $type = AvroUtil::array_value($field, AvroSchema::TYPE_ATTR);
+ $order = AvroUtil::array_value($field, AvroField::ORDER_ATTR);
+
+ $default = null;
+ $has_default = false;
+ if (array_key_exists(AvroField::DEFAULT_ATTR, $field))
+ {
+ $default = $field[AvroField::DEFAULT_ATTR];
+ $has_default = true;
+ }
+
+ if (in_array($name, $field_names))
+ throw new AvroSchemaParseException(
+ sprintf("Field name %s is already in use", $name));
+
+ $is_schema_from_schemata = false;
+ $field_schema = null;
+ if (is_string($type)
+ && $field_schema = $schemata->schema_by_name(
+ new AvroName($type, null, $default_namespace)))
+ $is_schema_from_schemata = true;
+ else
+ $field_schema = self::subparse($type, $default_namespace, $schemata);
+
+ $new_field = new AvroField($name, $field_schema, $is_schema_from_schemata,
+ $has_default, $default, $order);
+ $field_names []= $name;
+ $fields []= $new_field;
+ }
+ return $fields;
+ }
+
+ /**
+ * @var AvroSchema[] array of AvroNamedSchema field definitions of
+ * this AvroRecordSchema
+ */
+ private $fields;
+
+ /**
+ * @var array map of field names to field objects.
+ * @internal Not called directly. Memoization of AvroRecordSchema->fields_hash()
+ */
+ private $fields_hash;
+
+ /**
+ * @param string $name
+ * @param string $namespace
+ * @param string $doc
+ * @param array $fields
+ * @param AvroNamedSchemata &$schemata
+ * @param string $schema_type schema type name
+ * @throws AvroSchemaParseException
+ */
+ public function __construct($name, $doc, $fields, &$schemata=null,
+ $schema_type=AvroSchema::RECORD_SCHEMA)
+ {
+ if (is_null($fields))
+ throw new AvroSchemaParseException(
+ 'Record schema requires a non-empty fields attribute');
+
+ if (AvroSchema::REQUEST_SCHEMA == $schema_type)
+ parent::__construct($schema_type, $name);
+ else
+ parent::__construct($schema_type, $name, $doc, $schemata);
+
+ list($x, $namespace) = $name->name_and_namespace();
+ $this->fields = self::parse_fields($fields, $namespace, $schemata);
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = parent::to_avro();
+
+ $fields_avro = array();
+ foreach ($this->fields as $field)
+ $fields_avro [] = $field->to_avro();
+
+ if (AvroSchema::REQUEST_SCHEMA == $this->type)
+ return $fields_avro;
+
+ $avro[AvroSchema::FIELDS_ATTR] = $fields_avro;
+
+ return $avro;
+ }
+
+ /**
+ * @returns array the schema definitions of the fields of this AvroRecordSchema
+ */
+ public function fields() { return $this->fields; }
+
+ /**
+ * @returns array a hash table of the fields of this AvroRecordSchema fields
+ * keyed by each field's name
+ */
+ public function fields_hash()
+ {
+ if (is_null($this->fields_hash))
+ {
+ $hash = array();
+ foreach ($this->fields as $field)
+ $hash[$field->name()] = $field;
+ $this->fields_hash = $hash;
+ }
+ return $this->fields_hash;
+ }
+}
+
+/**
+ * Field of an {@link AvroRecordSchema}
+ * @package Avro
+ */
+class AvroField extends AvroSchema
+{
+
+ /**
+ * @var string fields name attribute name
+ */
+ const FIELD_NAME_ATTR = 'name';
+
+ /**
+ * @var string
+ */
+ const DEFAULT_ATTR = 'default';
+
+ /**
+ * @var string
+ */
+ const ORDER_ATTR = 'order';
+
+ /**
+ * @var string
+ */
+ const ASC_SORT_ORDER = 'ascending';
+
+ /**
+ * @var string
+ */
+ const DESC_SORT_ORDER = 'descending';
+
+ /**
+ * @var string
+ */
+ const IGNORE_SORT_ORDER = 'ignore';
+
+ /**
+ * @var array list of valid field sort order values
+ */
+ private static $valid_field_sort_orders = array(self::ASC_SORT_ORDER,
+ self::DESC_SORT_ORDER,
+ self::IGNORE_SORT_ORDER);
+
+
+ /**
+ * @param string $order
+ * @returns boolean
+ */
+ private static function is_valid_field_sort_order($order)
+ {
+ return in_array($order, self::$valid_field_sort_orders);
+ }
+
+ /**
+ * @param string $order
+ * @throws AvroSchemaParseException if $order is not a valid
+ * field order value.
+ */
+ private static function check_order_value($order)
+ {
+ if (!is_null($order) && !self::is_valid_field_sort_order($order))
+ throw new AvroSchemaParseException(
+ sprintf('Invalid field sort order %s', $order));
+ }
+
+ /**
+ * @var string
+ */
+ private $name;
+
+ /**
+ * @var boolean whether or no there is a default value
+ */
+ private $has_default;
+
+ /**
+ * @var string field default value
+ */
+ private $default;
+
+ /**
+ * @var string sort order of this field
+ */
+ private $order;
+
+ /**
+ * @var boolean whether or not the AvroNamedSchema of this field is
+ * defined in the AvroNamedSchemata instance
+ */
+ private $is_type_from_schemata;
+
+ /**
+ * @param string $type
+ * @param string $name
+ * @param AvroSchema $schema
+ * @param boolean $is_type_from_schemata
+ * @param string $default
+ * @param string $order
+ * @todo Check validity of $default value
+ * @todo Check validity of $order value
+ */
+ public function __construct($name, $schema, $is_type_from_schemata,
+ $has_default, $default, $order=null)
+ {
+ if (!AvroName::is_well_formed_name($name))
+ throw new AvroSchemaParseException('Field requires a "name" attribute');
+
+ $this->type = $schema;
+ $this->is_type_from_schemata = $is_type_from_schemata;
+ $this->name = $name;
+ $this->has_default = $has_default;
+ if ($this->has_default)
+ $this->default = $default;
+ $this->check_order_value($order);
+ $this->order = $order;
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function to_avro()
+ {
+ $avro = array(AvroField::FIELD_NAME_ATTR => $this->name);
+
+ $avro[AvroSchema::TYPE_ATTR] = ($this->is_type_from_schemata)
+ ? $this->type->qualified_name() : $this->type->to_avro();
+
+ if (isset($this->default))
+ $avro[AvroField::DEFAULT_ATTR] = $this->default;
+
+ if ($this->order)
+ $avro[AvroField::ORDER_ATTR] = $this->order;
+
+ return $avro;
+ }
+
+ /**
+ * @returns string the name of this field
+ */
+ public function name() { return $this->name; }
+
+ /**
+ * @returns mixed the default value of this field
+ */
+ public function default_value() { return $this->default; }
+
+ /**
+ * @returns boolean true if the field has a default and false otherwise
+ */
+ public function has_default_value() { return $this->has_default; }
+}