www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - How to erase chars from char[]?

reply Ben Hanson <Ben.Hanson tfbplc.co.uk> writes:
I've changed the code to use CharT[] again, which simplified things
substantially. However, I can't find a way to erase characters from a char[].
Can anyone help?

See the current code below.

Thanks,

Ben

module main;

import std.algorithm;
import std.string;

template regex(CharT)
{
struct basic_string_token
{
	bool _negated = false;
	CharT[] _charset;
	enum size_t MAX_CHARS = CharT.max + 1;
	enum size_t START_CHAR = cast(CharT) 0x80 < 0 ? 0x80 : 0;

	this(const bool negated_, ref CharT[] charset_)
	{
		_negated = negated_;
		_charset = charset_;
	}

	void remove_duplicates()
	{
		_charset.sort;
		_charset = squeeze(_charset.idup).dup;
	}

	void normalise()
	{
		if (_charset.length == MAX_CHARS)
		{
			_negated = !_negated;
			_charset.clear();
		}
		else if (_charset.length > MAX_CHARS / 2)
		{
			negate();
		}
	}

	void negate()
	{
		CharT curr_char_ = START_CHAR;
		CharT[] temp_;
		CharT *ptr_;
		CharT *curr_ = _charset.ptr;
		CharT *end_ = curr_ + _charset.length;
		size_t i_ = 0;

		_negated = !_negated;
		temp_.length = MAX_CHARS - _charset.length;
		ptr_ = temp_.ptr;

		while (curr_ < end_)
		{
			while (*curr_ > curr_char_)
			{
				*ptr_ = curr_char_;
				++ptr_;
				++curr_char_;
				++i_;
			}

			++curr_char_;
			++curr_;
			++i_;
		}

		for (; i_ < MAX_CHARS; ++i_)
		{
			*ptr_ = curr_char_;
			++ptr_;
			++curr_char_;
		}

		_charset = temp_;
	}

	bool empty()
	{
		return _charset.length == 0 && !_negated;
	}

	bool any()
	{
		return _charset.length == 0 && _negated;
	}

	void clear()
	{
		_negated = false;
		_charset.length = 0;
	}

	void intersect(ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
			!any () && !rhs_.any ()))
		{
			intersect_same_types (rhs_, overlap_);
		}
		else
		{
			intersect_diff_types (rhs_, overlap_);
		}
	}

private:
	void intersect_same_types (ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (any ())
		{
			clear ();
			overlap_._negated = true;
			rhs_.clear ();
		}
		else
		{
			CharT *iter_ = _charset.ptr;
			CharT *end_ = iter_ + _charset.length;
			CharT *rhs_iter_ = rhs_._charset.ptr;
			CharT *rhs_end_ = rhs_iter_ + rhs_._charset.length;

			overlap_._negated = _negated;

			while (iter_ != end_ && rhs_iter_ != rhs_end_)
			{
				if (*iter_ < *rhs_iter_)
				{
					++iter_;
				}
				else if (*iter_ > *rhs_iter_)
				{
					++rhs_iter_;
				}
				else
				{
					overlap_._charset ~= *iter_;
					_charset.erase(i_);
					iter_ = _charset.ptr;
					end_ = iter_ + _charset.length;
					rhs_._charset.erase(rhs_i_);
					rhs_iter_ = rhs_._charset.ptr;
					rhs_end_ = rhs_iter_ + rhs_._charset.length;
				}
			}

			if (_negated)
			{
			}
			else if (!overlap_._charset.length == 0)
			{
				normalise ();
				overlap_.normalise ();
				rhs_.normalise ();
			}
		}
	}

	void intersect_diff_types(ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (any ())
		{
			intersect_any(rhs_, overlap_);
		}
		else if (_negated)
		{
			intersect_negated(rhs_, overlap_);
		}
		else // _negated == false
		{
			intersect_charset(rhs_, overlap_);
		}
	}

	void intersect_any (ref basic_string_token rhs_, ref basic_string_token
overlap_)
	{
		if (rhs_._negated)
		{
			rhs_.intersect_negated (this, overlap_);
		}
		else // rhs._negated == false
		{
			rhs_.intersect_charset (this, overlap_);
		}
	}

	void intersect_negated (ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (rhs_.any ())
		{
			overlap_._negated = true;
			overlap_._charset = _charset;
			rhs_._negated = false;
			rhs_._charset = _charset;
			clear ();
		}
		else // rhs._negated == false
		{
			rhs_.intersect_charset (this, overlap_);
		}
	}

	void intersect_charset (ref basic_string_token rhs_,
		ref basic_string_token overlap_)
	{
		if (rhs_.any ())
		{
			overlap_._charset = _charset;
			rhs_._negated = true;
			rhs_._charset = _charset;
			clear ();
		}
		else // rhs_._negated == true
		{
		}
	}
};
}

int main(char[][]argv)
{
	regex!(char).basic_string_token token_;

	token_._charset = "cccbba";
	token_.remove_duplicates();
	token_.negate();
	return 0;
}
Jun 21 2010
next sibling parent bearophile <bearophileHUGS lycos.com> writes:
Ben Hanson:
 However, I can't find a way to erase characters from a char[].
 Can anyone help?
If you need to delete the last chars you can just decrease the length. If you need to delete chars in the middle you can copy items with memmove() and then decrease the length. You can also write a function to do it, this is just a rough starting point for such function: import std.range: hasAssignableElements; import std.c.string: memmove; /** this doesn't work with user-defined arrays it can be made more general, able to work on Random Access Ranges can can shrink It can contain a static if that tells apart true D arrays, and uses memmove() on them, from generic Random Access Ranges, that need a for loop to copy items. */ void remove(T)(ref R[] items, size_t start, size_t stop) if (hasAssignableElements!R) in { assert(stop => start); assert(items.length >= start); assert(items.length >= stop); } out { // assert(items.length <= old.items.length); // not doable yet } body { if (stop == items.length) items.length = start; else { memmove(...); items.length = ... } } void main() { char[] s = "012345678".dup; s.remove(2, 4); assert(s == "..."); } Bye, bearophile
Jun 21 2010
prev sibling parent Andrei Alexandrescu <SeeWebsiteForEmail erdani.org> writes:
On 06/21/2010 06:43 AM, Ben Hanson wrote:
 I've changed the code to use CharT[] again, which simplified things
 substantially. However, I can't find a way to erase characters from a char[].
 Can anyone help?
http://www.digitalmars.com/d/2.0/phobos/std_array.html#replace Andrei
Jun 21 2010